UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1

iOS查找工程中未使用方法的脚本报错: 这是叫脚本源码

# coding:utf-8

import os

import re

import sys

import getopt

reserved_prefixs = ["-[", "+["]

# 获取入参参数

def input_parameter():

opts, args = getopt.getopt(sys.argv[1:], '-a:-p:-w:-b:',

['app_path=', 'project_path=', 'black_list_Str', 'white_list_str'])

black_list_str = ''

white_list_str = ''

white_list = []

black_list = []

# 入参判断

for opt_name, opt_value in opts:

if opt_name in ('-a', '--app_path'):

# .app文件路径

app_path = opt_value

if opt_name in ('-p', '--project_path'):

# 项目文件路径

project_path = opt_value

if opt_name in ('-b', '--black_list_Str'):

# 检测黑名单前缀,不检测谁

black_list_Str = opt_value

if opt_name in ('-w', '--white_list_str'):

# 检测白名单前缀,只检测谁

white_list_str = opt_value

if len(black_list_str) > 0:

black_list = black_list_str.split(",")

if len(white_list_str) > 0:

white_list = white_list_str.split(",")

if len(white_list) > 0 and len(black_list) > 0:

print("\033[0;31;40m白名单【-w】和黑名单【-b】不能同时存在\033[0m")

exit(1)

# 判断文件路径存不存在

if not os.path.exists(project_path):

print("\033[0;31;40m输入的项目文件路径【-p】不存在\033[0m")

exit(1)

app_path = verified_app_path(app_path)

if not app_path:

exit('输入的app路径不存在,停止运行')

return app_path, project_path, black_list, white_list

def verified_app_path(path):

if path.endswith('.app'):

appname = path.split('/')[-1].split('.')[0]

path = os.path.join(path, appname)

if appname.endswith('-iPad'):

path = path.replace(appname, appname[:-5])

if not os.path.isfile(path):

return None

if not os.popen('file -b ' + path).read().startswith('Mach-O'):

return None

return path

# 获取protocol中所有的方法

def header_protocol_selectors(file_path):

# 删除路径前后的空格

file_path = file_path.strip()

if not os.path.isfile(file_path):

return None

protocol_sels = set()

file = open(file_path, 'r')

is_protocol_area = False

# 开始遍历文件内容

for line in file.readlines():

# 删除注释信息

# delete description

line = re.sub('\".*\"', '', line)

# delete annotation

line = re.sub('//.*', '', line)

# 检测是否是 @protocol

# match @protocol

if re.compile('\s*@protocol\s*\w+').findall(line):

is_protocol_area = True

# match @end

if re.compile('\s*@end').findall(line):

is_protocol_area = False

# match sel

if is_protocol_area and re.compile('\s*[-|+]\s*\(').findall(line):

sel_content_match_result = None

# - (CGPoint)convertPoint:(CGPoint)point toCoordinateSpace:(id <UICoordinateSpace>)coordinateSpace

if ':' in line:

# match sel with parameters

# 【"convertPoint:","toCoordinateSpace:"]

sel_content_match_result = re.compile('\w+\s*:').findall(line)

else:

# - (void)invalidate;

# match sel without parameters

# invalidate;

sel_content_match_result = re.compile('\w+\s*;').findall(line)

if sel_content_match_result:

# 方法参数拼接

# convertPoint:toCoordinateSpace:

funcList = ''.join(sel_content_match_result).replace(';', '')

protocol_sels.add(funcList)

file.close()

return protocol_sels

# 获取所有protocol定义的方法

def protocol_selectors(path, project_path):

print('获取所有的protocol中的方法...')

header_files = set()

protocol_sels = set()

# 获取当前引用的系统库中的方法列表

system_base_dir = '/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk'

# get system librareis

lines = os.popen('otool -L ' + path).readlines()

for line in lines:

# 去除首尾空格

line = line.strip()

# /System/Library/Frameworks/MediaPlayer.framework/MediaPlayer (compatibility version 1.0.0, current version 1.0.0)

# /System/Library/Frameworks/MediaPlayer.framework/MediaPlayer

# delete description,

line = re.sub('\(.*\)', '', line).strip()

if line.startswith('/System/Library/'):

# [0:-1],获取数组的左起第一个,到倒数最后一个,不包含最后一个,[1,-1)左闭右开

library_dir = system_base_dir + '/'.join(line.split('/')[0:-1])

if os.path.isdir(library_dir):

# 获取当前系统架构中所有的类

# 获取合集

header_files = header_files.union(os.popen('find %s -name \"*.h\"' % library_dir).readlines())

if not os.path.isdir(project_path):

exit('Error: project path error')

# 获取当前路径下面所有的.h文件路径

header_files = header_files.union(os.popen('find %s -name \"*.h\"' % project_path).readlines())

for header_path in header_files:

# 获取所有查找到的文件下面的protocol方法,这些方法,不能用来统计

header_protocol_sels = header_protocol_selectors(header_path)

if header_protocol_sels:

protocol_sels = protocol_sels.union(header_protocol_sels)

return protocol_sels

def imp_selectors(path):

print('获取所有的方法,除了setter and getter方法...')

# return struct: {'setupHeaderShadowView':['-[TTBaseViewController setupHeaderShadowView]']}

# imp 0x100001260 -[AppDelegate setWindow:] ==>> -[AppDelegate setWindow:],setWindow:

re_sel_imp = re.compile('\s*imp\s*0x\w+ ([+|-]\[.+\s(.+)\])')

re_properties_start = re.compile('\s*baseProperties 0x\w{9}')

re_properties_end = re.compile('\w{16} 0x\w{9} _OBJC_CLASS_\$_(.+)')

re_property = re.compile('\s*name\s*0x\w+ (.+)')

imp_sels = {}

is_properties_area = False

# “otool - ov”将输出Objective - C类结构及其定义的方法。

for line in os.popen('/usr/bin/otool -oV %s' % path).xreadlines():

results = re_sel_imp.findall(line)

if results:

# imp 0x100001260 -[AppDelegate setWindow:] ==>> [-[AppDelegate setWindow:],setWindow:]

(class_sel, sel) = results[0]

if sel in imp_sels:

imp_sels[sel].add(class_sel)

else:

imp_sels[sel] = set([class_sel])

else:

# delete setter and getter methods as ivar assignment will not trigger them

# 删除相关的set方法

if re_properties_start.findall(line):

is_properties_area = True

if re_properties_end.findall(line):

is_properties_area = False

if is_properties_area:

property_result = re_property.findall(line)

if property_result:

property_name = property_result[0]

if property_name and property_name in imp_sels:

# properties layout in mach-o is after func imp

imp_sels.pop(property_name)

# 拼接set方法

setter = 'set' + property_name[0].upper() + property_name[1:] + ':'

# 干掉set方法

if setter in imp_sels:

imp_sels.pop(setter)

return imp_sels

def ref_selectors(path):

print('获取所有被调用的方法...')

re_selrefs = re.compile('__TEXT:__objc_methname:(.+)')

ref_sels = set()

lines = os.popen('/usr/bin/otool -v -s __DATA __objc_selrefs %s' % path).readlines()

for line in lines:

results = re_selrefs.findall(line)

if results:

ref_sels.add(results[0])

return ref_sels

def ignore_selectors(sel):

if sel == '.cxx_destruct':

return True

if sel == 'load':

return True

return False

def filter_selectors(sels):

filter_sels = set()

for sel in sels:

for prefix in reserved_prefixs:

if sel.startswith(prefix):

filter_sels.add(sel)

return filter_sels

def unref_selectors(path, project_path):

# 获取所有类的protocol的方法集合

protocol_sels = protocol_selectors(path, project_path)

# 获取项目所有的引用方法

ref_sels = ref_selectors(path)

if len(ref_sels) == 0:

exit('获取项目所有的引用方法为空....')

# 获取所有的方法,除了set方法

imp_sels = imp_selectors(path)

print("\n")

if len(imp_sels) == 0:

exit('Error: imp selectors count null')

unref_sels = set()

for sel in imp_sels:

# 所有的方法,忽略白名单

if ignore_selectors(sel):

continue

# 如果当前的方法不在protocol中,也不再引用的方法中,那么认为这个方法没有被用到

# protocol sels will not apppear in selrefs section

if sel not in ref_sels and sel not in protocol_sels:

unref_sels = unref_sels.union(filter_selectors(imp_sels[sel]))

return unref_sels

# 黑白名单过滤

def filtration_list(unref_sels, black_list, white_list):

# 黑名单过滤

temp_unref_sels = list(unref_sels)

if len(black_list) > 0:

# 如果黑名单存在,那么将在黑名单中的前缀都过滤掉

for unref_sel in temp_unref_sels:

for black_prefix in black_list:

class_method = "+[%s" % black_prefix

instance_method = "-[%s" % black_prefix

if (unref_sel.startswith(class_method) or unref_sel.startswith(

instance_method)) and unref_sel in unref_sels:

unref_sels.remove(unref_sel)

break

# 白名单过滤

temp_array = []

if len(white_list) > 0:

# 如果白名单存在,只留下白名单中的部分

for unref_sel in unref_sels:

for white_prefix in white_list:

class_method = "+[%s" % white_prefix

instance_method = "-[%s" % white_prefix

if unref_sel.startswith(class_method) or unref_sel.startswith(instance_method):

temp_array.append(unref_sel)

break

unref_sels = temp_array

return unref_sels

# 整理结果,写入文件

def write_to_file(unref_sels):

file_name = 'selector_unrefs.txt'

f = open(os.path.join(sys.path[0].strip(), file_name), 'w')

unref_sels_num_str = '查找到未被使用的方法: %d个\n' % len(unref_sels)

print(unref_sels_num_str)

f.write(unref_sels_num_str)

num = 1

for unref_sel in unref_sels:

unref_sels_str = '%d : %s' % (num, unref_sel)

print(unref_sels_str)

f.write(unref_sels_str + '\n')

num = num + 1

f.close()

print('\n项目中未使用方法检测完毕,相关结果存储到当前目录 %s 中' % file_name)

print('请在项目中进行二次确认后处理')

if __name__ == '__main__':

# 获取入参

app_path, project_path, black_list, white_list = input_parameter()

# 获取未使用方法

unref_sels = unref_selectors(app_path, project_path)

# 黑白名单过滤

unref_sels = filtration_list(unref_sels, black_list, white_list)

# 打印写入文件

write_to_file(unref_sels)

获取所有的protocol中的方法...
Traceback (most recent call last):
File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 309, in <module>

unref_sels = unref_selectors(app_path, project_path)

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 231, in unref_selectors

protocol_sels = protocol_selectors(path, project_path)

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 151, in protocol_selectors

header_protocol_sels = header_protocol_selectors(header_path)

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 80, in header_protocol_selectors

for line in file.readlines():

File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/codecs.py", line 322, in decode

(result, consumed) = self._buffer_decode(data, self.errors, final)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1 in position 2340: invalid start byte


回答:

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 80, in header_protocol_selectors

for line in file.readlines():

已经指出具体错误的位置了啊。在

file = open(file_path, 'r')

这里没指定编码,而且是文本模式打开,python 3 默认用 utf-8 解码文件内容。而你打开的文件里包含非 UTF-8 编码的内容,所以在 readlines 出现解码错误。

最简单的办法是在 readlines 这里 try except 一下抓住 UnicodeDecodeError,把有问题的文件名字打出来,编码统一成 utf-8 在处理。如果有问题的文件很多,可以自己包装一个 readlines 函数,尝试用不同编码打开文件。

又或者不需要str的话,用rb模式打开文件,直接读bytes,也不会有 UnicodeDecodeError。

以上是 UnicodeDecodeError: &#x27;utf-8&#x27; codec can&#x27;t decode byte 0xb1 的全部内容, 来源链接: utcz.com/p/938227.html

回到顶部