python3 正则表达式(python3入门)

Z时代
2024-01-10
分类：综合
python
  1 # 一、正则表达式：
  2 # \w    表示数字、字母、下划线
  3 # \W     表示上面描述的内容取反
  4 #
  5 #
  6 # \s     表示任意空白字符（\t \n)
  7 # \S     表示上面描述的内容取反
  8 #
  9 # \d     [0-9]数字
 10 # \D     非数字 [^\d]
 11 #
 12 # ^    表示从开头开始匹配
 13 # $    表示从结尾开始匹配
 14 #
 15 # .    代表任意非一个字符（除了\n以外）
 16 # 若要让. 匹配所有包括换行符号，则表达式为中的参数需加上re.DOTALL
 17 # print(re.findall(\'a.c\',\'abc a c a1c aaaaa a\nc a\tc\',re.DOTALL))
 18 
 19 
 20 # []  表示匹配一个字符，这个字符必须是指定范围内的
 21 #   [0-9]表示0-9任意一个数字，[a-zA-Z]表示大小写a-z中任意一个字母
 22 # print(re.findall(\'a[0-9]c\',\'abc a c a1c aaaaa a\nc a\tc\'))
 23 # print(re.findall(\'a[a-z]\',\'abc a c a1c aaaaa a\nc a\tc\'))
 24 # print(re.findall(\'a[+=#!-]c\',\'a#c a!c a+c a-c aa abc a1c a\nc\'))
 25 
 26 
 27 
 28 #二、重复匹配:
 29 #?  表示?左边的一个字符出现0次或者1次(通俗点来说，如果?左边的字符有，就取，如果没有则不取）
 30 # print(re.findall(\'ab?\',\'ab abb abbbb abbbbbbb a1b a+b\'))    #[\'ab\', \'ab\', \'ab\', \'ab\', \'a\', \'a\']
 31 
 32 #*  表示左边的一个字符出现0次或者无穷次
 33 # print(re.findall(\'ab*\',\'ab abb abbbb abbbbbbb a1b a+b\'))    #[\'ab\', \'abb\', \'abbbb\', \'abbbbbbb\', \'a\', \'a\']
 34 
 35 #.*     表示任意0个或者无穷个非换行字符(贪婪匹配，找到离他最远的字符）
 36 # print(re.findall(\'a.*c\',\'ac a12312131dsssssdddss466668948949239kddddccdsdfacdc 123 a\nc\'))
 37     #[\'ac a12312131dsssssdddss466668948949239kddddccdsdfacdc\']
 38 
 39 #.*?    非贪婪匹配（固定搭配，?在这里不是之前?的意思了）
 40 # print(re.findall(\'a.*?c\',\'ac a1c2312131dsssssdddss466668948949239kddddccdsdfacdc 123 a\nc\'))    #[\'ac\', \'a1c\', \'ac\']
 41 
 42 
 43 #|  表示或者，满足其一都可以
 44 # print(re.findall(\'company|companies\',\'Too many companies have gone bankrupt, and the next one is my company\'))
 45 # print(re.findall(\'compan(?:y|ies)\',\'Too many companies have gone bankrupt, and the next one is my company\'))
 46 #两种方法都可以
 47 
 48 
 49 #() 指定表达式，但是只保留()内所指定的内容，若想要取()外的内容，则可以写成(?:过滤内容) ?:为固定搭配
 50 #可以认为()左边所定义的内容为匹配的条件。
 51 # print(re.findall(\'compan(y|ies)\',\'Too many companies have gone bankrupt, and the next one is my company\'))
 52 # print(re.findall(\'compan(?:y|ies)\',\'Too many companies have gone bankrupt, and the next one is my company\'))
 53 #使用分组+非贪婪 表达式获取字符串中的url地址
 54 # print(re.findall(\'href="(.*?)"\',\'<a href="https://www.baidu.com/1.mp4"><p>"哈哈哈"</p><a href="https://www.baidu.com/2.mp4">\'))
 55 
 56 
 57 #+  代表左边的那个字符出现1次或者无穷次（+ 和 *进行对比)
 58 # print(re.findall(\'ab*\',\'a ab abb abbb abbbbbbb a1b a+b\'))
 59 # print(re.findall(\'ab+\',\'a ab abb abbb abbbbbbb a1b a+b\'))
 60 
 61 
 62 #\  转义符，\后面的字符，取消特殊字符的意义
 63 #python解释器使用的是C解释器的功能，所以在python语法解析后，会剥离掉一个\ ，然后再给c解释器的时候，就变成另外一层意思了
 64 
 65 # print(re.findall(\'a\.c\',\'a.c a1c aac aAc\'))
 66 # print(re.findall(\'a\\.\',\'a\c ac ab2 a. ac\'))     #先经过python语法解析后，正则条件会变成\'a\c\'，然而再给到c解释器的时候，就认为
 67 #                                                 #是在匹配\'a\c\'，此时a+不转译c
 68 #
 69 # print(re.findall(r\'a\\\.\',\'a\c ac ab2 a\. ac\'))     #效果和下面一致
 70 # print(re.findall(\'a\\\\\\.\',\'a\c ac ab2 a\. ac\'))   #效果和上面一致
 71 #
 72 #
 73 # print(re.findall(\'a\\\\c\',\'a\c ac ab2 ac\'))
 74 # print(re.findall(r\'a\\c\',\'a\c ac ab2 ac\'))
 75 
 76 
 77 
 78 #小练习
 79 # print(re.findall(\'\D?(\-?\d+\.?\d*)\',"1-12*(60+(-40.9834898/5)-(-4*3)"))  #取之字符串中所有的整数
 80 
 81 #{} 自定义某个字符出现的次数    #{0,2} => 表示出现的次数为0～2次，{}左边的字符如果有，则至多取两次，如果没有，则不取
 82 # print(re.findall(\'ab{0,2}\',\'a ab abb abbbb\'))
 83 
 84 
 85 
 86 
 87 #re模块其他的使用方法
 88 x = \'myleon leon is DSB\'
 89 
 90 #re.search 从头开始查找，若找到就返回查找到的对象，可以通过.group()提取该值
 91 # print(re.search(\'leon\',x))  #从左往右查找，返回第一个查找到的对象
 92 # print(re.search(\'leon\',x).group())  #根据查找到的对象返回一个最终的值
 93 # print(re.search(\'^leon\',x)) #查找以leon开头的结果，效果等同re.match
 94 
 95 #re.match() 只对开头开始匹配
 96 # print(re.match(\'leon\',x))   #查找以alex开头的值,返回一个对象，若被查找的字符串开头不是这个查询条件，则返回None
 97 # print(re.match(\'leon\',x).group())   #返回一个查找的结果值,None是没有办法返回值的
 98 
 99 #re.split() 可以指定正则来进行切分
100 # info = \'a*-=b->:c\'   #无法使用\'\'.split()来进行分割,每次符合正则条件的时候将左边的放入列表中
101 # print(re.split(\'[*=>-]\',info))  #[\'a\', \'\', \'\', \'b\', \'\', \'c\']
102 # res = [i for i in re.split(\'[*-=>]\',info) if len(i) != 0]   #通过三元表达式+re.split
103 # print(res)
104 
105 #re.sub()   #类似\'\'.replace的功能
106 # msg1 = \'alex  myalex alex is DSB\'
107 # print(re.sub(\'^alex\',\'ALEX\',msg1))   #ALEX  myalex alex is DSB
108 # print(msg1.replace(\'alex\',\'ALEX\'))   #ALEX  myALEX ALEX is DSB
109 
110 # msg2=\'alex my name is alex Alex AlEx aLeX is hahahah\'   #将不论大小写的alex 都替换成DSB
111 # print(re.sub(\'alex\',\'DSB\',msg2,flags=re.I)) #flag=I 意思为忽略大小写
112 
113 
114 # msg3="""
115 # a1231231231231egon
116 # 123123sadfsadf2143egon
117 # sdafljsalfdjerwqregon
118 # """
119 # print(re.findall(\'egon$\',msg3,flags=re.M))  #flag=re.M 意思为每一行都取
120 
121 
122 
123 #了解 re.sub()+分组+序号 进行重新排序
124 # msg4="Name---@#@#--is==++*=Pig"
125 # print(re.sub(\'([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)\',r\'\5\2\3\4\1\',msg4))
126 
127 #re.compile() 将正则表达式存为变量,然后可以重复调用
128 # patten = re.compile(\'href="(.*?)"\')
129 # print(patten.findall(\'<a href="https://www.baidu.com/1.mp4"><p>"哈哈哈"</p><a href="https://www.baidu.com/2.mp4">\'))
130 # print(patten.findall(\'<a href="https://www.kaixin.com/png1"><p>"哈哈哈"</p><a href="https://www.kaixin001.com/png2">\'))
以上是 python3 正则表达式(python3入门) 的全部内容，来源链接： utcz.com/z/387837.html
python3 正则表达式(python3入门)

其他人也看了：