Python 处理数据的实例详解

Python 处理数据的实例详解

最近用python(3.2的版本)写了根据特定规则,处理数据的一个小程序,用到了一些python常用的基础知识,在此总结一下:

1,python读文件

2,python写文件

3,python的流程控制

4,python的for循环

5,python的集合,或字符串里判断是否存在某个元素

6,python的逻辑或,逻辑与

7,python的正则过滤

8,python的字符串忽略空格,和以某个字符串开头和按某个字符拆分成list

python的打开文件的模式:

关于open 模式:

w     以写方式打开,

a     以追加模式打开 (从 EOF 开始, 必要时创建新文件)

r+     以读写模式打开

w+     以读写模式打开 (参见 w )

a+     以读写模式打开 (参见 a )

rb     以二进制读模式打开

wb     以二进制写模式打开 (参见 w )

ab     以二进制追加模式打开 (参见 a )

rb+    以二进制读写模式打开 (参见 r+ )

wb+    以二进制读写模式打开 (参见 w+ )

ab+    以二进制读写模式打开 (参见 a+ )

处理代码如下:

def showtxt(path,outpathname,detailpath):

greenpath=r"C:\\Users\\qindongliang\\Desktop\\tnstxt\\green.txt";

redpath=r"C:\\Users\\qindongliang\\Desktop\\tnstxt\\red.txt";

redset=listtxt(redpath)

greenset=listtxt(greenpath)

print("红色词数量: ",len(redset))

print("绿色词数量: ",len(greenset))

#符合1条件的内容写入

f1=open(r"C:\Users\qindongliang\Desktop\tnstxt\result\\"+detailpath+"\\1.txt",encoding="UTF-8",mode="a+")

#符合2条件的内容写入

f2=open(r"C:\Users\qindongliang\Desktop\tnstxt\result\\"+detailpath+"\\2.txt",encoding="UTF-8",mode="a+")

#符合3条件的内容写入

f3=open(r"C:\Users\qindongliang\Desktop\tnstxt\result\\"+detailpath+"\\3.txt",encoding="UTF-8",mode="a+")

#符合4条件的内容写入

f4=open(r"C:\Users\qindongliang\Desktop\tnstxt\result\\"+detailpath+"\\4.txt",encoding="UTF-8",mode="a+")

delcount=1;

f=open(path,encoding="UTF-8",mode="r+")

fnew=open(outpathname,encoding="UTF-8",mode="a+")

flog=open(outpathname+".log",encoding="UTF-8",mode="a+")

#count=1;

for line in f:

list=line.strip().split("\t")

line=line.strip()

catalogid=list[0]

score=list[1]

keyword=clear(list[4].strip())

if keyword in redset:

if catalogid.startswith("018022") or catalogid.startswith("018035") or catalogid.startswith("014023003") :

f1.write(line+"\n")#符合1条件写入

fnew.write(line+"\n")#符合1条件写入

else:

flog.write(line+" 不符合条件1 "+"\n")

delcount=delcount+1

if keyword in greenset:

if not (catalogid.startswith("018022") or catalogid.startswith("018035") or catalogid.startswith("014023003")) :

fnew.write(line+"\n")

else:

f2.write(line+"\n")

flog.write(line+" 不符合条件2"+"\n")

delcount=delcount+1

flist=formatStrList(keyword)

if "sexy" in flist or "sex" in flist:

if catalogid.startswith("018022") or catalogid.startswith("018035") or catalogid.startswith("014023003") :

f3.write(line+"\n")

fnew.write(line+"\n")

else:

flog.write(line+" 不符合条件3"+"\n")

delcount=delcount+1

#if (keyword.find("underwear")!=-1) & keyword.find("sexy")==-1 & keyword.find("sex")==-1:

if "underwear" in flist and "sexy" not in flist and "sex" not in flist:

if catalogid.startswith("014032") :

f4.write(line+"\n")

fnew.write(line+"\n")

else:

flog.write(line+" 不符合条件4"+"\n")

delcount=delcount+1

#print(list[0]," ",list[1]," ",list[4])

#print()

flog.write("删除总数目: "+str(delcount))

f.close()

f1.close()

f2.close()

f3.close()

f4.close()

fnew.close()

flog.close()

import re

def clear(str):

str=re.sub("[\"\"\'\'+]","",str)

return str

def formatStrList(keyword):

list=keyword.split(" ")

for item in list:

item.strip();

return list

def listtxt(path):

f=open(path,encoding="UTF-8")

s=set()

for line in f:

s.add(line.strip())

f.close()

return s

path1=r"C:\\Users\\qindongliang\\Desktop\\tnstxt\\highfrequency.txt"

pathout1=r"C:\\Users\\qindongliang\\Desktop\\tnstxt\\detail\\a_highfrequency.txt"

detail1path="highfrequency"

path2=r"C:\\Users\\qindongliang\\Desktop\\tnstxt\\highfrequency_d1.txt"

pathout2=r"C:\\Users\\qindongliang\\Desktop\\tnstxt\\detail\\b_highfrequency_d1.txt"

detail2path="highfrequency_d1"

#showtxt(path1,pathout1,detail1path)

showtxt(path2,pathout2,detail2path)

以上是 Python 处理数据的实例详解 的全部内容, 来源链接: utcz.com/z/329389.html

回到顶部