python3 通过头文件判断文件类型

python

CODE_TYPE_DIC = {

'68746D6C3E': 'html',

'd0cf11e0a1b11ae10000': 'xls',

'44656C69766572792D64': 'eml',

'ffd8ffe000104a464946': 'jpg',

'89504e470d0a1a0a0000': 'png',

'47494638396126026f01': 'gif',

'49492a00227105008037': 'tif',

'424d228c010000000000': 'bmp',

'424d8240090000000000': 'bmp',

'424d8e1b030000000000': 'bmp',

'41433130313500000000': 'dwg',

'3c21444f435459504520': 'html',

'3c21646f637479706520': 'htm',

'48544d4c207b0d0a0942': 'css',

'696b2e71623d696b2e71': 'js',

'7b5c727466315c616e73': 'rtf',

'38425053000100000000': 'psd',

'46726f6d3a203d3f6762': 'eml',

'd0cf11e0a1b11ae10000': 'doc',

'd0cf11e0a1b11ae10000': 'vsd',

'5374616E64617264204A': 'mdb',

'252150532D41646F6265': 'ps',

'255044462d312e350d0a': 'pdf',

'2e524d46000000120001': 'rmvb',

'464c5601050000000900': 'flv',

'00000020667479706d70': 'mp4',

'49443303000000002176': 'mp3',

'000001ba210001000180': 'mpg',

'3026b2758e66cf11a6d9': 'wmv',

'52494646e27807005741': 'wav',

'52494646d07d60074156': 'avi',

'4d546864000000060001': 'mid',

'504b0304140000080044': 'zip',

'504b03040a0000080000': 'zip',

'504b03040a0000000000': 'zip',

'526172211a0700cf9073': 'rar',

'235468697320636f6e66': 'ini',

'504b03040a0000000000': 'jar',

'4d5a9000030000000400': 'exe',

'3c25402070616765206c': 'jsp',

'4d616e69666573742d56': 'mf',

'3c3f786d6c2076657273': 'xml',

'494e5345525420494e54': 'sql',

'7061636b616765207765': 'java',

'406563686f206f66660d': 'bat',

'1f8b0800000000000000': 'gz',

'6c6f67346a2e726f6f74': 'properties',

'cafebabe0000002e0041': 'class',

'49545346030000006000': 'chm',

'04000000010000001300': 'mxp',

'504b0304140006000800': 'docx',

'd0cf11e0a1b11ae10000': 'wps',

'6431303a637265617465': 'torrent',

}

# 字节码转16进制字符串

def bytes2hex(b):

num = len(b)

hex_str = u''

for i in range(num):

t = u'%x' % b[i]

if len(t) % 2:

hex_str += u'0'

hex_str += t

return hex_str.upper()

# 获取文件类型

def file_type(filename, n=20):

with open(filename, 'rb') as f:

bins = f.read(n)

bins = bytes2hex(bins) # 转码

bins = bins.lower() # 小写

f_type = 'unknown'

for hcode in CODE_TYPE_DIC.keys():

lens = len(hcode) # 需要的长度

if bins[0:lens] == hcode:

f_type = CODE_TYPE_DIC[hcode]

break

if f_type == 'unknown': # 全码未找到,优化处理,码表取5位验证

bins = bins[0:5]

for hcode in CODE_TYPE_DIC.keys():

if len(hcode) > 5 and bins == hcode[0:5]:

f_type = CODE_TYPE_DIC[hcode]

break

return f_type

if __name__ == '__main__':

print(file_type('01.jpg'))

参考链接:https://blog.csdn.net/privateobject/article/details/78069500

以上是 python3 通过头文件判断文件类型 的全部内容, 来源链接: utcz.com/z/388865.html

回到顶部