[轉]Python之基於十六進位制判斷檔案型別
阿新 • • 發佈:2021-12-16
核心程式碼:
#!/usr/bin/env python# -*- coding: utf-8 -*-# @Author : sukimport structfrom io import BytesIO# 支援檔案型別# 用16進位制字串的目的是可以知道檔案頭是多少位元組# 各種檔案頭的長度不一樣,少則2字元,長則8字元def typeList(types): type_dict = {'jpg': ['FFD8FFE000104A464946'], 'png': ['89504E470D0A1A0A0000'], 'gif': ['47494638396126026F01is_file_type.py'], 'tif': ['49492A00227105008037'], 'bmp': ['424D8E1B030000000000'], 'dwg': ['41433130313500000000'], 'html': ['3C21444F435459504520'], 'htm': ['3C21646F637479706520'], 'css': ['48544D4C207B0D0A0942'], 'js': ['696B2E71623D696B2E71'], 'rtf': ['7B5C727466315C616E73'], 'psd': ['38425053000100000000'], 'eml': ['46726F6D3A203D3F6762'], 'wps': ['D0CF11E0A1B11AE10000'], 'mdb': ['5374616E64617264204A'], 'ps': '[252150532D41646F6265]', 'pdf': ['255044462D312E'], 'rmvb': ['2E524D46000000120001'], 'flv': ['464C5601050000000900'], 'mp4': ['00000020667479706D70'], 'mp3': ['49443303000000002176'], 'mpg': ['000001BA210001000180'], 'wmv': ['3026B2758E66CF11A6D9'], 'wav': ['52494646E27807005741'], 'avi': ['52494646D07D60074156'], 'mid': ['4D546864000000060001'], 'zip': ['504B0304140000000800', '504B0304140000080800', '504B03040A0000080000'], 'rar': ['526172211A0700CF9073'], 'ini': ['235468697320636F6E66'], 'jar': ['504B03040A0000000000'], 'exe': ['4D5A9000030000000400'], 'jsp': ['3C25402070616765206C'], 'mf': ['4D616E69666573742D56'], 'xml': ['3C3F786D6C2076657273'], 'sql': ['494E5345525420494E54'], 'java': ['7061636B616765207765'], 'bat': ['406563686F206F66660D'], 'gz': ['1F8B0800000000000000'], 'properties': ['6C6F67346A2E726F6F74'], 'class': ['CAFEBABE0000002E0041'], 'chm': ['49545346030000006000'], 'mxp': ['04000000010000001300'], 'docx': ['504B0304140006000800', '504B03040A0000000000'], 'torrent': ['6431303A637265617465'], 'mov': ['6D6F6F76'], 'wpd': ['FF575043'], 'dbx': ['CFAD12FEC5FD746F'], 'pst': ['2142444E'], 'qdf': ['AC9EBD8F'], 'pwl': ['E3828596'], 'ram': ['2E7261FD'] } ret = {} for k_hex, v_prefix in type_dict.items(): if k_hex in types: ret[k_hex] = v_prefix return ret# 位元組碼轉16進位制字串def bytes2hex(bytes): num = len(bytes) hexstr = u"" for i in range(num): t = u"%x" % bytes[i] if len(t) % 2: hexstr += u"0" hexstr += t return hexstr.upper()# 獲取檔案型別def file_type(filename): binfile = open(filename, 'rb') # 必需二制字讀取 tl = typeList(types=["jpg", "zip", "docx"]) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要讀多少位元組 binfile.seek(0) # 每次讀取都要回到檔案頭,不然會一直往後讀取 hbytes = struct.unpack_from("B" * numOfBytes, binfile.read(numOfBytes)) # 一個 "B"表示一個位元組 f_hcode = bytes2hex(hbytes) # 如果判斷不出來,打印出這個值,往字典增加即可 # print("上傳資料流hex", s_hcode, '=', "程式碼字典hex", hcode) # 如果判斷不出來,打印出這個值,往字典增加即可 if f_hcode == hcode: flag = True break if flag: ftype = type_name break binfile.close() return ftype# 獲取位元組流型別def stream_type(stream, types): """ :param stream:流資料 :param types:需要判斷檔案型別,格式:["jpg","jpn"] :return: """ tl = typeList(types=types) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要讀多少位元組 hbytes = struct.unpack_from("B" * numOfBytes, stream[0:numOfBytes]) # 一個 "B"表示一個位元組 s_hcode = bytes2hex(hbytes) # print("上傳資料流hex", s_hcode, '=', "程式碼字典hex", hcode) # 如果判斷不出來,打印出這個值,往字典增加即可 if s_hcode == hcode: flag = True break if flag: ftype = type_name break return ftypedef stream_split(stream, count=3): """ 主要處理流是分段獲取的資料 :param stream: 塊流 :param count: 取多少段合成來判斷型別,預設三段 :return: """ block_stream = BytesIO() temp = 1 for block in stream: block_stream.write(block) if temp == count: break temp += 1 return block_stream.getvalue()
type_dict字典,根據自己上傳的檔案,來填寫,資料來自網際網路。
基於Flask的上傳示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
@index .route( '/upload' , methods = [ 'GET' , 'POST' ])
def upload():
if request.method = = 'GET' :
return render_template( 'upload.html' )
upload_obj = request.files.get( 'code_file' )
if not upload_obj:
return '沒有選擇檔案上傳'
ret = stream_type(stream_split(upload_obj.stream), [ "jpg" , "png" , "pdf" ])
if not ret:
return '上傳失敗,檔案型別不匹配,型別必須 "jpg" or "png" or "pdf"'
file_name = upload_obj.filename
upload_obj.save(os.path.join( 'files' , file_name))
return '上傳檔案成功'
|
upload.html
1 2 3 4 5 6 7 8 |
{% extends 'layout.html' %}
{% block content %}
< h1 >上傳程式碼</ h1 >
< form action="" method="post" enctype="multipart/form-data">
< input type="file" name="code_file">
< input type="submit" value="上傳"></ input >
</ form >
{% endblock %}
|
開始上傳檔案:
上傳不在列表中的檔案型別
上傳在列表中的檔案型別
---------------------
作者:小粉優化大師
來源:CNBLOGS
原文:https://www.cnblogs.com/ygbh/p/11918876.html
版權宣告:本文為作者原創文章,轉載請附上博文連結!