Python 遍历文件夹,统计所有不同后缀的文件数量与比例
在Github上经常看到项目里不同语言的占比,突然想写一个试试。
自己写完一个残缺的,感觉肯定不是最优解,所以来请教大家的写法。谢谢。
!/usr/bin/env python
coding:utf-8
import os
if name == '__main__':
work_space = os.getcwd()pycount = 0
javacount = 0
jscount = 0
wecount = 0
othercount = 0
total = 0
print work_space
print '正在计算比例,请稍候.....'.decode('utf-8')
for path, dir, file_arr in os.walk('./'):
for file_name in file_arr:
javasuffix = file_name[-5:]
pysuffix = file_name[-3:]
if cmp(pysuffix, '.py') == 0:
pycount += 1
elif cmp(pysuffix, '.js') == 0:
jscount += 1
elif cmp(javasuffix, '.java') == 0:
javacount += 1
elif cmp(pysuffix, '.we') == 0:
wecount += 1
else:
othercount += 1
total = pycount + jscount + javacount + wecount + othercount
total = float(total)
print '计算完成: 总文件数 : %d '.decode('utf-8') % total
print '文件数 Java: %d js: %d py: %d we: %d others: %d '.decode('utf-8') % (
javacount, jscount, pycount, wecount, othercount)
print '所占比例 Java: %.2f%% js:%.2f%% py:%.2f%% we:%.2f%% others:%.2f%%'.decode('utf-8') % (
javacount / total * 100, jscount / total * 100, pycount / total * 100, wecount / total * 100,
othercount / total * 100)
回答:
#coding: utf-8import os
from itertools import groupby
file_lst = []
for path, dir, files in os.walk('./'):
file_lst += files
file_count = len(file_lst) * 1.0
for key, lst in groupby(file_lst, key=lambda x: os.path.splitext(x)[1]):
print key, round(len(list(lst))/file_count, 2)
看第二种:
#coding: utf-8import os
from collections import defaultdict
file_count = 1.0
res = defaultdict()
for path, dir, files in os.walk('./'):
file_count += len(files)
for file in files:
suf = os.path.splitext(file)[1]
res[suf] = res.setdefault(suf, 0) + 1
for k, v in res.iteritems():
print k, round(v/file_count, 4) * 100
回答:
改进后的
# coding: utf-8import os
from itertools import groupby
def getpercent(num):
return str(round(num / file_count * 100, 2))
def getresult():
for key in language_dict:
print '%s count:%d percent:%s%%' % (key, language_dict.get(key), getpercent(language_dict[key]))
if __name__ == '__main__':
file_lst = []
language_dict = {}
for path, dirs, files in os.walk('./'):
file_lst += files
file_count = len(file_lst) * 1.0
for key, lst in groupby(file_lst, key=lambda x: os.path.splitext(x)[1]):
if key not in language_dict.keys():
language_dict[key] = len(list(lst))
else:
language_dict[key] = language_dict.get(key) + len(list(lst))
getresult()
以上是 Python 遍历文件夹,统计所有不同后缀的文件数量与比例 的全部内容, 来源链接: utcz.com/a/156545.html