UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1

Z时代
2024-03-09
分类：IT

iOS查找工程中未使用方法的脚本报错：这是叫脚本源码

# coding:utf-8
import os
import re
import sys
import getopt
reserved_prefixs = ["-[", "+["]
# 获取入参参数
def input_parameter():
    opts, args = getopt.getopt(sys.argv[1:], '-a:-p:-w:-b:',
                               ['app_path=', 'project_path=', 'black_list_Str', 'white_list_str'])
    black_list_str = ''
    white_list_str = ''
    white_list = []
    black_list = []
    # 入参判断
    for opt_name, opt_value in opts:
        if opt_name in ('-a', '--app_path'):
            # .app文件路径
            app_path = opt_value
        if opt_name in ('-p', '--project_path'):
            # 项目文件路径
            project_path = opt_value
        if opt_name in ('-b', '--black_list_Str'):
            # 检测黑名单前缀，不检测谁
            black_list_Str = opt_value
        if opt_name in ('-w', '--white_list_str'):
            # 检测白名单前缀，只检测谁
            white_list_str = opt_value
    if len(black_list_str) > 0:
        black_list = black_list_str.split(",")
    if len(white_list_str) > 0:
        white_list = white_list_str.split(",")
    if len(white_list) > 0 and len(black_list) > 0:
        print("\033[0;31;40m白名单【-w】和黑名单【-b】不能同时存在\033[0m")
        exit(1)
    # 判断文件路径存不存在
    if not os.path.exists(project_path):
        print("\033[0;31;40m输入的项目文件路径【-p】不存在\033[0m")
        exit(1)
    app_path = verified_app_path(app_path)
    if not app_path:
        exit('输入的app路径不存在，停止运行')
    return app_path, project_path, black_list, white_list
def verified_app_path(path):
    if path.endswith('.app'):
        appname = path.split('/')[-1].split('.')[0]
        path = os.path.join(path, appname)
        if appname.endswith('-iPad'):
            path = path.replace(appname, appname[:-5])
    if not os.path.isfile(path):
        return None
    if not os.popen('file -b ' + path).read().startswith('Mach-O'):
        return None
    return path
# 获取protocol中所有的方法
def header_protocol_selectors(file_path):
    # 删除路径前后的空格
    file_path = file_path.strip()
    if not os.path.isfile(file_path):
        return None
    protocol_sels = set()
    file = open(file_path, 'r')
    is_protocol_area = False
    # 开始遍历文件内容
    for line in file.readlines():
        # 删除注释信息
        # delete description
        line = re.sub('\".*\"', '', line)
        # delete annotation
        line = re.sub('//.*', '', line)
        # 检测是否是 @protocol
        # match @protocol
        if re.compile('\s*@protocol\s*\w+').findall(line):
            is_protocol_area = True
        # match @end
        if re.compile('\s*@end').findall(line):
            is_protocol_area = False
        # match sel
        if is_protocol_area and re.compile('\s*[-|+]\s*\(').findall(line):
            sel_content_match_result = None
            # - (CGPoint)convertPoint:(CGPoint)point toCoordinateSpace:(id <UICoordinateSpace>)coordinateSpace
            if ':' in line:
                # match sel with parameters
                # 【"convertPoint:"，"toCoordinateSpace:"]
                sel_content_match_result = re.compile('\w+\s*:').findall(line)
            else:
                # - (void)invalidate;
                # match sel without parameters
                # invalidate;
                sel_content_match_result = re.compile('\w+\s*;').findall(line)
            if sel_content_match_result:
                # 方法参数拼接
                # convertPoint:toCoordinateSpace:
                funcList = ''.join(sel_content_match_result).replace(';', '')
                protocol_sels.add(funcList)
    file.close()
    return protocol_sels
# 获取所有protocol定义的方法
def protocol_selectors(path, project_path):
    print('获取所有的protocol中的方法...')
    header_files = set()
    protocol_sels = set()
    # 获取当前引用的系统库中的方法列表
    system_base_dir = '/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk'
    # get system librareis
    lines = os.popen('otool -L ' + path).readlines()
    for line in lines:
        # 去除首尾空格
        line = line.strip()
        # /System/Library/Frameworks/MediaPlayer.framework/MediaPlayer (compatibility version 1.0.0, current version 1.0.0)
        # /System/Library/Frameworks/MediaPlayer.framework/MediaPlayer
        # delete description,
        line = re.sub('\(.*\)', '', line).strip()
        if line.startswith('/System/Library/'):
            # [0:-1],获取数组的左起第一个，到倒数最后一个,不包含最后一个,[1,-1)左闭右开
            library_dir = system_base_dir + '/'.join(line.split('/')[0:-1])
            if os.path.isdir(library_dir):
                # 获取当前系统架构中所有的类
                # 获取合集
                header_files = header_files.union(os.popen('find %s -name \"*.h\"' % library_dir).readlines())
    if not os.path.isdir(project_path):
        exit('Error: project path error')
    # 获取当前路径下面所有的.h文件路径
    header_files = header_files.union(os.popen('find %s -name \"*.h\"' % project_path).readlines())
    for header_path in header_files:
        # 获取所有查找到的文件下面的protocol方法,这些方法，不能用来统计
        header_protocol_sels = header_protocol_selectors(header_path)
        if header_protocol_sels:
            protocol_sels = protocol_sels.union(header_protocol_sels)
    return protocol_sels
def imp_selectors(path):
    print('获取所有的方法，除了setter and getter方法...')
    # return struct: {'setupHeaderShadowView':['-[TTBaseViewController setupHeaderShadowView]']}
    #  imp     0x100001260 -[AppDelegate setWindow:] ==>> -[AppDelegate setWindow:],setWindow:
    re_sel_imp = re.compile('\s*imp\s*0x\w+ ([+|-]\[.+\s(.+)\])')
    re_properties_start = re.compile('\s*baseProperties 0x\w{9}')
    re_properties_end = re.compile('\w{16} 0x\w{9} _OBJC_CLASS_\$_(.+)')
    re_property = re.compile('\s*name\s*0x\w+ (.+)')
    imp_sels = {}
    is_properties_area = False
    # “otool - ov”将输出Objective - C类结构及其定义的方法。
    for line in os.popen('/usr/bin/otool -oV %s' % path).xreadlines():
        results = re_sel_imp.findall(line)
        if results:
            #  imp     0x100001260 -[AppDelegate setWindow:] ==>> [-[AppDelegate setWindow:],setWindow:]
            (class_sel, sel) = results[0]
            if sel in imp_sels:
                imp_sels[sel].add(class_sel)
            else:
                imp_sels[sel] = set([class_sel])
        else:
            # delete setter and getter methods as ivar assignment will not trigger them
            # 删除相关的set方法
            if re_properties_start.findall(line):
                is_properties_area = True
            if re_properties_end.findall(line):
                is_properties_area = False
            if is_properties_area:
                property_result = re_property.findall(line)
                if property_result:
                    property_name = property_result[0]
                    if property_name and property_name in imp_sels:
                        # properties layout in mach-o is after func imp
                        imp_sels.pop(property_name)
                        # 拼接set方法
                        setter = 'set' + property_name[0].upper() + property_name[1:] + ':'
                        # 干掉set方法
                        if setter in imp_sels:
                            imp_sels.pop(setter)
    return imp_sels
def ref_selectors(path):
    print('获取所有被调用的方法...')
    re_selrefs = re.compile('__TEXT:__objc_methname:(.+)')
    ref_sels = set()
    lines = os.popen('/usr/bin/otool -v -s __DATA __objc_selrefs %s' % path).readlines()
    for line in lines:
        results = re_selrefs.findall(line)
        if results:
            ref_sels.add(results[0])
    return ref_sels
def ignore_selectors(sel):
    if sel == '.cxx_destruct':
        return True
    if sel == 'load':
        return True
    return False
def filter_selectors(sels):
    filter_sels = set()
    for sel in sels:
        for prefix in reserved_prefixs:
            if sel.startswith(prefix):
                filter_sels.add(sel)
    return filter_sels
def unref_selectors(path, project_path):
    # 获取所有类的protocol的方法集合
    protocol_sels = protocol_selectors(path, project_path)
    # 获取项目所有的引用方法
    ref_sels = ref_selectors(path)
    if len(ref_sels) == 0:
        exit('获取项目所有的引用方法为空....')
    # 获取所有的方法，除了set方法
    imp_sels = imp_selectors(path)
    print("\n")
    if len(imp_sels) == 0:
        exit('Error: imp selectors count null')
    unref_sels = set()
    for sel in imp_sels:
        # 所有的方法，忽略白名单
        if ignore_selectors(sel):
            continue
        # 如果当前的方法不在protocol中，也不再引用的方法中，那么认为这个方法没有被用到
        # protocol sels will not apppear in selrefs section
        if sel not in ref_sels and sel not in protocol_sels:
            unref_sels = unref_sels.union(filter_selectors(imp_sels[sel]))
    return unref_sels
# 黑白名单过滤
def filtration_list(unref_sels, black_list, white_list):
    # 黑名单过滤
    temp_unref_sels = list(unref_sels)
    if len(black_list) > 0:
        # 如果黑名单存在，那么将在黑名单中的前缀都过滤掉
        for unref_sel in temp_unref_sels:
            for black_prefix in black_list:
                class_method = "+[%s" % black_prefix
                instance_method = "-[%s" % black_prefix
                if (unref_sel.startswith(class_method) or unref_sel.startswith(
                        instance_method)) and unref_sel in unref_sels:
                    unref_sels.remove(unref_sel)
                    break
    # 白名单过滤
    temp_array = []
    if len(white_list) > 0:
        # 如果白名单存在，只留下白名单中的部分
        for unref_sel in unref_sels:
            for white_prefix in white_list:
                class_method = "+[%s" % white_prefix
                instance_method = "-[%s" % white_prefix
                if unref_sel.startswith(class_method) or unref_sel.startswith(instance_method):
                    temp_array.append(unref_sel)
                    break
        unref_sels = temp_array
    return unref_sels
# 整理结果，写入文件
def write_to_file(unref_sels):
    file_name = 'selector_unrefs.txt'
    f = open(os.path.join(sys.path[0].strip(), file_name), 'w')
    unref_sels_num_str = '查找到未被使用的方法: %d个\n' % len(unref_sels)
    print(unref_sels_num_str)
    f.write(unref_sels_num_str)
    num = 1
    for unref_sel in unref_sels:
        unref_sels_str = '%d : %s' % (num, unref_sel)
        print(unref_sels_str)
        f.write(unref_sels_str + '\n')
        num = num + 1
    f.close()
    print('\n项目中未使用方法检测完毕，相关结果存储到当前目录 %s 中' % file_name)
    print('请在项目中进行二次确认后处理')
if __name__ == '__main__':
    # 获取入参
    app_path, project_path, black_list, white_list = input_parameter()
    # 获取未使用方法
    unref_sels = unref_selectors(app_path, project_path)
    # 黑白名单过滤
    unref_sels = filtration_list(unref_sels, black_list, white_list)
    # 打印写入文件
    write_to_file(unref_sels)

获取所有的protocol中的方法...
Traceback (most recent call last):
File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 309, in <module>

unref_sels = unref_selectors(app_path, project_path)

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 231, in unref_selectors

protocol_sels = protocol_selectors(path, project_path)

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 151, in protocol_selectors

header_protocol_sels = header_protocol_selectors(header_path)

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 80, in header_protocol_selectors

for line in file.readlines():

File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/codecs.py", line 322, in decode

(result, consumed) = self._buffer_decode(data, self.errors, final)

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1 in position 2340: invalid start byte

回答：

File "/Users/tonystark/Downloads/Performance-main/FindSelectorsUnrefs.py", line 80, in header_protocol_selectors

for line in file.readlines():

已经指出具体错误的位置了啊。在

file = open(file_path, 'r')

这里没指定编码，而且是文本模式打开，python 3 默认用 utf-8 解码文件内容。而你打开的文件里包含非 UTF-8 编码的内容，所以在 readlines 出现解码错误。

最简单的办法是在 readlines 这里 try except 一下抓住 UnicodeDecodeError，把有问题的文件名字打出来，编码统一成 utf-8 在处理。如果有问题的文件很多，可以自己包装一个 readlines 函数，尝试用不同编码打开文件。

又或者不需要str的话，用rb模式打开文件，直接读bytes，也不会有 UnicodeDecodeError。

以上是 UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1 的全部内容，来源链接： utcz.com/p/938227.html

UnicodeDecodeError: &#x27;utf-8&#x27; codec can&#x27;t decode byte 0xb1

回答：

其他人也看了：

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb1