python用代码下载文件文件大小为0或只有2KB怎么解决?

python用代码下载文件文件大小为0或只有2KB怎么解决?

代码1:

#!/usr/bin/env python

# coding=utf-8

#import importlib,sys

#import sys

#sys.setdefaultencoding('gbk')

'''import sys

import imp

import sys

reload(sys)

sys.setdefaultencoding('utf8')

'''

'''

import sys

sys.setdefaultencoding('utf-8')

import jieba

import json'''

from bs4 import BeautifulSoup

import urllib.request

import urllib.parse as parse

import ssl

import re

import os,os.path

import codecs

import requests

def getHtml(url):

global html

page = urllib.request.urlopen(url)

html = page.read()

return html

def file(url1,file_name,name):

print(url1)

headers = {'Host': 'download.lfd.uci.edu','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://www.lfd.uci.edu/~gohlke/pythonlibs/',

'Connection': 'keep-alive',

'Upgrade-Insecure-Requests': '1',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',

'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Accept-Encoding': 'gzip, deflate, sdch, br',

'Accept-Language': 'zh-CN,zh;q=0.8'}

#req = urllib.urlretrieve(download_url,headers=headers)

#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')

#req = urllib.request.Request(url=url,headers=header)

request = requests.get(url=url1,headers=headers)

#response = urllib.request.urlopen(request)

global i

i += 1

file = open(name ,'wb+')

file.write(request.content)

file.close()

print(file_name)

print("Completed : .... %d ..." % x)

save_path = os.getcwd()

url = 'https://www.lfd.uci.edu/'

html = getHtml(url)

html='''

<li><a href="javascript:;" onclick=" javascript:dl([101,105,52,56,106,100,50,118,54,95,110,53,119,47,116,99,51,104,113,108,45,112,115,97,46,49,109], &quot;E53AD6:=&lt;F4?C5G5GHC&gt;D?2C&gt;D?2C;098FI471G;@B&quot;); &quot;javascript: dl(&quot;" title="[614 KB] [Oct 17, 2019]">ad3‑2.2.1‑cp38‑cp38‑win_amd64.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,108,116,49,47,105,53,99,50,104,97,119,46,115,100,113,51,110,112,118,56,106,45], &quot;&lt;7D&gt;AB5139=?E7;7;2E6A?CE6A?CE:4@?7;:80&quot;); &quot;javascript: dl(&quot;" title="[544 KB] [Oct 17, 2019]">ad3‑2.2.1‑cp38‑cp38‑win32.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,52,108,119,95,116,104,109,45,100,47,50,55,51,99,105,46,49,53,97,106,115,113,112,110,54,118], &quot;D:CEFIA49B8&lt;7:?:?@7=F&lt;;7=F&lt;;672&gt;G3B68H0?251&quot;); &quot;javascript: dl(&quot;" title="[609 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp37‑cp37m‑win_amd64.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,100,50,106,53,51,116,105,115,49,108,46,97,45,104,118,47,119,113,55,112,110,109,99], &quot;712AC&gt;35?;04&lt;1:1:8&lt;FC4B&lt;FC4BE&lt;@6D41:@=9&quot;); &quot;javascript: dl(&quot;" title="[540 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp37‑cp37m‑win32.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,119,50,116,49,54,115,112,106,104,99,51,100,46,108,109,95,97,53,110,118,113,105,47,45,52], &quot;517D6CA2F@;:G1&lt;1&lt;3G96:4G96:4&gt;G0EB?@&gt;;4H&lt;08=&quot;); &quot;javascript: dl(&quot;" title="[598 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp36‑cp36m‑win_amd64.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,110,54,49,113,104,45,108,99,53,116,46,106,115,51,112,50,109,97,118,119,105,47,100], &quot;&lt;?;3&gt;B89EAF=5?:?:257&gt;=157&gt;=1@5CD0=?:C46&quot;); &quot;javascript: dl(&quot;" title="[534 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp36‑cp36m‑win32.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,53,119,105,112,46,51,104,106,108,95,49,97,47,118,54,109,45,52,99,110,116,113,115,100,50], &quot;FH7E3=0D&lt;B350&lt;;G5@H4H4:@B350@B350?@12C9;?G&gt;A4168&quot;); &quot;javascript: dl(&quot;" title="[596 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp35‑cp35m‑win_amd64.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,46,104,53,109,47,99,106,115,50,110,119,100,49,113,118,108,51,45,116,112,97,105], &quot;786=C&gt;2B45C@24D;@A8080&lt;A5C@2A5C@23A:E9@80:1?&quot;); &quot;javascript: dl(&quot;" title="[533 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp35‑cp35m‑win32.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,106,45,105,47,112,95,115,104,119,49,109,116,52,51,53,108,118,97,99,110,113,50,46,100,54], &quot;6E0D4@&gt;;3B4=&lt;3AG=1EFEF91B4=&lt;1B4=&lt;:182C5A:GH&lt;F87?&quot;); &quot;javascript: dl(&quot;" title="[599 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp34‑cp34m‑win_amd64.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,97,119,113,51,109,49,118,47,110,100,104,50,53,106,112,45,99,108,52,115,46,116,105], &quot;C;=2&gt;6&lt;E7@&gt;3B7093?;D;D5?@&gt;3B?@&gt;3B4?1F83;D1:A&quot;); &quot;javascript: dl(&quot;" title="[539 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp34‑cp34m‑win32.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,52,49,116,45,53,100,51,97,109,118,54,113,105,46,50,106,95,112,119,47,55,108,115,110,104,99], &quot;F&gt;?;A942CIA&gt;DC7563&gt;=&gt;=13IA&gt;D3IA&gt;D83B&lt;G@785:0=BHE&quot;); &quot;javascript: dl(&quot;" title="[642 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp27‑cp27m‑win_amd64.whl</a></li>

<li><a href="javascript:;" onclick=" javascript:dl([101,46,53,49,106,110,108,47,50,99,118,105,112,109,116,119,51,97,100,113,45,115,104,55], &quot;D73B;91=68;7F6@A?C70702C8;7FC8;7F&lt;C&gt;:4?70&gt;E5&quot;); &quot;javascript: dl(&quot;" title="[556 KB] [Oct 02, 2018]">ad3‑2.2.1‑cp27‑cp27m‑win32.whl</a></li>

</ul>

'''

print('html done')

#html.decode('utf-8')

#print(html)

'''headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1)AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11'}

r = requests.get(url, headers = headers)

r.encoding = "utf-8"

soup = BeautifulSoup(r.text, "html.parser")

#html_mod=re.sub(pattern=".",repl=".",string=html.decode('utf-8'))

for link in soup.find_all('a'): #soup.find_all返回的为列表

print(link.get('href'))

#name_list+=link

'''

name_list = html#soup.find_all('a')#re.findall(r']">*-cp38-win_amd64.whl',html.decode('utf-8'))

x=1

files=os.listdir(save_path)

print(files)

print(type(name_list))

name_list=str(name_list)

name_list1=[]

#print(name_list)

#for name in name_list:

k=0

# name[k]=str(name1[k])

for i in range(len(name_list)):

j=0

if name_list[i-2:i+1]==']">':

name_list1.append(name_list[i+1:i+60])

global m

if k<len(name_list1):

for l in range(len(name_list1[k])):

if l-9>=0:

if name_list1[k][l-4:l]=='.whl' or name_list1[k][l-3:l]=='.gz' or name_list1[k][l-4:l]=='.zip':

j=1

m=l

if j==1:

name_list1[k]=name_list1[k][0:m]

k+=1

'''if j==0:

name_list.remove(name)'''

#file_name = os.path.join(save_path ,name)

i=0

#print(name)

print(name_list1)

for name in name_list1:

j=0

for l in range(len(name)):

if l-9>=0:

if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':

j=1

m=l

if j==1:

name=name[0:m]

k+=1

if name in files:

continue

'''if name=='Delny‑0.4.1‑cp27‑none‑win_amd64.whl</a></li>\n<li>' or name==Delny‑0.4.1‑cp27‑none‑win32.whl</a></li>

</ul>

</:

continue

'''

print('no:'+str(x))

print('\ndownload '+name)

# importlib.reload(sys)

#imp.reload(sys)

for l in range(len(name)):

if l-9>=0:

if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':

j=1

m=l

if j==1:

name=name[0:m]

k+=1

string='https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/' + name#[0:4+name.find('.whl')]#https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/

print('00'+save_path)

file(string,save_path,name)

x=x+1

print('09'+name_list)

print('finished')

'''

'''

import sys

sys.setdefaultencoding('utf-8')

import jieba

import json'''

#from bs4 import BeautifulSoup

import urllib.request

import urllib.parse as parse

import ssl

import re

import os,os.path

import codecs

import requests

def getText(html):

'''headers = {'Host': 'https://pypi.org','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/search/?c=Programming+Language+%3A%3A+Python+%3A%3A+3',

'Connection': 'keep-alive',

'Upgrade-Insecure-Requests': '1',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',

'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Accept-Encoding': 'gzip, deflate, sdch, br',

'Accept-Language': 'zh-CN,zh;q=0.8'}

#req = urllib.urlretrieve(download_url,headers=headers)

'''

#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')

#req = urllib.request.Request(url=url,headers=header)

#headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}

#import requests

res = requests.get(html)

res.encoding = 'utf-8'

#print(res.text)

words=res.text

'''

soup = BeautifulSoup(res.text, "html.parser")

words = ""

for a1 in soup.find_all("a"):

words = words + str(a1.string)

'''

return words

def file(url1,file_name,name):

print(url1)

headers = {'Host': 'https://files.pythonhosted.org/packages/','User-Agent':'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER','Referer': 'https://pypi.org/',

'Connection': 'keep-alive',

'Upgrade-Insecure-Requests': '1',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER',

'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',

'Accept-Encoding': 'gzip, deflate, sdch, br',

'Accept-Language': 'zh-CN,zh;q=0.8'}

#req = urllib.urlretrieve(download_url,headers=headers)

#urllib.request.urlopen('https://www.lfd.uci.edu/~gohlke/pythonlibs/')

#req = urllib.request.Request(url=url,headers=header)

request = requests.get(url=url1,headers=headers)

#response = urllib.request.urlopen(request)

global i

i += 1

file = open(name ,'wb+')

file.write(request.content)

file.close()

print(file_name)

print("Completed : .... %d ..." % x)

'''for i in range(len(name_list)):

j=0

if name_list[i-24:i+1]=='https://pypi.org/project/':

name_list1.append(name_list[i+1:i+60])'''

def get(url):

global name_list1

res=getText(url)

#print('\n\n\n\n\n\nok\n\n\n\n\n\n\n\n\n\n')

#name_list = getText(url)

#print(res)

print('html done,page:'+str(count)+'\n')

for i in range(len(res)):

#j=0

if (res[i-8:i+1]=='/project/')==True:

name_list1.append('https://pypi.org'+res[i-8:i+20])

#print(name_list1)

def trim(list1):

k=0

list2=[]

for i in list1:

j=25

while j<len(list1[k]):

if list1[k][j]=='/':

list2.append(list1[k][0:j])

break

j+=1

k+=1

return list2

def get1(url):

"""o=0

for n in len(url):

if url[n]=='"':

url=url[0:n-1]+'#files'

"""

global namelist

url=url+'#files'

#import requests

res = requests.get(url)

res.encoding = 'utf-8'

#print(res.text)

html=res.text

for p in range(len(html)):

stri='https://files'

if html[p-len(stri):p]==stri:

namelist.append(html[p-len(stri):p+170])

save_path = os.getcwd()

'''

url = 'https://pypi.org/search/?c=Programming+Language+%3A%3A+Python+%3A%3A+3'

name_list = getText(url)

print(name_list)

print('html done')

#html.decode('utf-8')

print(name_list)'''

x=1

files=os.listdir(save_path)

#print(files)

#print(type(name_list))

name_list1=[]

#print(name_list)

#for name in name_list:

k=0

# name[k]=str(name1[k])

'''for i in range(len(name_list)):

j=0

if name_list[i-25:i+1]=='https://pypi.org/project/':

name_list1.append(name_list[i-25:i+20])

for u in range(len(name_list1[len(name_list1)])):

if name_list1[len(name_list1)][u]==' ':

name_list1[len(name_list1)]=name_list1[len(name_list1)][0:u-1]

'''

global count

count=2

name_list1=[]

for count in range(1):

get('https://pypi.org/search/?c=Programming+Language+%3A%3A+Python+%3A%3A+3&page='+str(count))

""" global m

if k<len(name_list1):

for l in range(len(name_list1[k])):

if l-9>=0:

if name_list1[k][l-4:l]=='.whl' or name_list1[k][l-3:l]=='.gz' or name_list1[k][l-4:l]=='.zip':

j=1

m=l

if j==1:

name_list1[k]=name_list1[k][0:m]

k+=1"""

'''if j==0:

name_list.remove(name)'''

#file_name = os.path.join(save_path ,name)

#i=0

#print(name)

#print(name_list1)

namelist=[]

h=0

for y in trim(name_list1):

get1(y)

print(namelist)

if h==3:

break

h+=1

i=0

for name in namelist:

j=0

for l in range(len(name)):

if l-9>=0:

if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':

j=1

m=l

break

if j==1:

name=name[0:m]

k+=1

while m>0:

if m<len(name):

if name[m]=='/':

filename=name[m+1:len(name)]#p]

break

m-=1

if filename in files:

continue

'''if name=='Delny‑0.4.1‑cp27‑none‑win_amd64.whl</a></li>\n<li>' or name==Delny‑0.4.1‑cp27‑none‑win32.whl</a></li>

</ul>

</:

continue

'''

print('no:'+str(x))

print('\ndownload '+name)

# importlib.reload(sys)

#imp.reload(sys)

for l in range(len(name)):

if l-9>=0:

if name[l-4:l]=='.whl' or name[l-3:l]=='.gz' or name[l-4:l]=='.zip':

j=1

m=l

break

if j==1:

name=name[0:m]

k+=1

p=m

#string='https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/' + name#[0:4+name.find('.whl')]#https://download.lfd.uci.edu/pythonlibs/s2jqpv5t/

print('00'+save_path)

file(name,save_path,filename)

print('\n........'+filename+'..........complete\n')

x=x+1

print('09')

print('finished')

求高手解决

以上是 python用代码下载文件文件大小为0或只有2KB怎么解决? 的全部内容, 来源链接: utcz.com/a/163313.html

回到顶部