1. 程式人生 > 其它 >[轉]Python之基於十六進位制判斷檔案型別

[轉]Python之基於十六進位制判斷檔案型別

核心程式碼:

#!/usr/bin/env python# -*- coding: utf-8 -*-# @Author  : sukimport structfrom io import BytesIO# 支援檔案型別# 用16進位制字串的目的是可以知道檔案頭是多少位元組# 各種檔案頭的長度不一樣,少則2字元,長則8字元def typeList(types):    type_dict = {'jpg': ['FFD8FFE000104A464946'],                 'png': ['89504E470D0A1A0A0000'],                 'gif': ['47494638396126026F01
'], 'tif': ['49492A00227105008037'], 'bmp': ['424D8E1B030000000000'], 'dwg': ['41433130313500000000'], 'html': ['3C21444F435459504520'], 'htm': ['3C21646F637479706520'], 'css': ['48544D4C207B0D0A0942'], 'js': ['696B2E71623D696B2E71
'], 'rtf': ['7B5C727466315C616E73'], 'psd': ['38425053000100000000'], 'eml': ['46726F6D3A203D3F6762'], 'wps': ['D0CF11E0A1B11AE10000'], 'mdb': ['5374616E64617264204A'], 'ps': '[252150532D41646F6265]', 'pdf': ['
255044462D312E'], 'rmvb': ['2E524D46000000120001'], 'flv': ['464C5601050000000900'], 'mp4': ['00000020667479706D70'], 'mp3': ['49443303000000002176'], 'mpg': ['000001BA210001000180'], 'wmv': ['3026B2758E66CF11A6D9'], 'wav': ['52494646E27807005741'], 'avi': ['52494646D07D60074156'], 'mid': ['4D546864000000060001'], 'zip': ['504B0304140000000800', '504B0304140000080800', '504B03040A0000080000'], 'rar': ['526172211A0700CF9073'], 'ini': ['235468697320636F6E66'], 'jar': ['504B03040A0000000000'], 'exe': ['4D5A9000030000000400'], 'jsp': ['3C25402070616765206C'], 'mf': ['4D616E69666573742D56'], 'xml': ['3C3F786D6C2076657273'], 'sql': ['494E5345525420494E54'], 'java': ['7061636B616765207765'], 'bat': ['406563686F206F66660D'], 'gz': ['1F8B0800000000000000'], 'properties': ['6C6F67346A2E726F6F74'], 'class': ['CAFEBABE0000002E0041'], 'chm': ['49545346030000006000'], 'mxp': ['04000000010000001300'], 'docx': ['504B0304140006000800', '504B03040A0000000000'], 'torrent': ['6431303A637265617465'], 'mov': ['6D6F6F76'], 'wpd': ['FF575043'], 'dbx': ['CFAD12FEC5FD746F'], 'pst': ['2142444E'], 'qdf': ['AC9EBD8F'], 'pwl': ['E3828596'], 'ram': ['2E7261FD'] } ret = {} for k_hex, v_prefix in type_dict.items(): if k_hex in types: ret[k_hex] = v_prefix return ret# 位元組碼轉16進位制字串def bytes2hex(bytes): num = len(bytes) hexstr = u"" for i in range(num): t = u"%x" % bytes[i] if len(t) % 2: hexstr += u"0" hexstr += t return hexstr.upper()# 獲取檔案型別def file_type(filename): binfile = open(filename, 'rb') # 必需二制字讀取 tl = typeList(types=["jpg", "zip", "docx"]) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要讀多少位元組 binfile.seek(0) # 每次讀取都要回到檔案頭,不然會一直往後讀取 hbytes = struct.unpack_from("B" * numOfBytes, binfile.read(numOfBytes)) # 一個 "B"表示一個位元組 f_hcode = bytes2hex(hbytes) # 如果判斷不出來,打印出這個值,往字典增加即可 # print("上傳資料流hex", s_hcode, '=', "程式碼字典hex", hcode) # 如果判斷不出來,打印出這個值,往字典增加即可 if f_hcode == hcode: flag = True break if flag: ftype = type_name break binfile.close() return ftype# 獲取位元組流型別def stream_type(stream, types): """ :param stream:流資料 :param types:需要判斷檔案型別,格式:["jpg","jpn"] :return: """ tl = typeList(types=types) ftype = None for type_name, hcode_list in tl.items(): flag = False for hcode in hcode_list: numOfBytes = int(len(hcode) / 2) # 需要讀多少位元組 hbytes = struct.unpack_from("B" * numOfBytes, stream[0:numOfBytes]) # 一個 "B"表示一個位元組 s_hcode = bytes2hex(hbytes) # print("上傳資料流hex", s_hcode, '=', "程式碼字典hex", hcode) # 如果判斷不出來,打印出這個值,往字典增加即可 if s_hcode == hcode: flag = True break if flag: ftype = type_name break return ftypedef stream_split(stream, count=3): """ 主要處理流是分段獲取的資料 :param stream: 塊流 :param count: 取多少段合成來判斷型別,預設三段 :return: """ block_stream = BytesIO() temp = 1 for block in stream: block_stream.write(block) if temp == count: break temp += 1 return block_stream.getvalue()
is_file_type.py
type_dict字典,根據自己上傳的檔案,來填寫,資料來自網際網路。

基於Flask的上傳示例

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 @index.route('/upload', methods=['GET', 'POST']) def upload(): if request.method == 'GET': return render_template('upload.html') upload_obj = request.files.get('code_file') if not upload_obj: return '沒有選擇檔案上傳' ret = stream_type(stream_split(upload_obj.stream), ["jpg", "png", "pdf"]) if not ret: return '上傳失敗,檔案型別不匹配,型別必須 "jpg" or "png" or "pdf"' file_name = upload_obj.filename upload_obj.save(os.path.join('files', file_name)) return '上傳檔案成功'

upload.html

1 2 3 4 5 6 7 8 {% extends 'layout.html' %} {% block content %} <h1>上傳程式碼</h1> <form action="" method="post" enctype="multipart/form-data"> <input type="file" name="code_file"> <input type="submit" value="上傳"></input> </form> {% endblock %}

開始上傳檔案:

上傳不在列表中的檔案型別


上傳在列表中的檔案型別


---------------------
作者:小粉優化大師
來源:CNBLOGS
原文:https://www.cnblogs.com/ygbh/p/11918876.html
版權宣告:本文為作者原創文章,轉載請附上博文連結!