Python实现批量下载文件

Python实现批量下载文件

#!/usr/bin/env python

# -*- coding:utf-8 -*-

from gevent import monkey

monkey.patch_all()

from gevent.pool import Pool

import requests

import sys

import os

def download(url):

chrome = 'Mozilla/5.0 (X11; Linux i86_64) AppleWebKit/537.36 ' +

'(KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'

headers = {'User-Agent': chrome}

filename = url.split('/')[-1].strip()

r = requests.get(url.strip(), headers=headers, stream=True)

with open(filename, 'wb') as f:

for chunk in r.iter_content(chunk_size=1024):

if chunk:

f.write(chunk)

f.flush()

print filename,"is ok"

def removeLine(key, filename):

os.system('sed -i /%s/d %s' % (key, filename))

if __name__ =="__main__":

if len(sys.argv) == 2:

filename = sys.argv[1]

f = open(filename,"r")

p = Pool(4)

for line in f.readlines():

if line:

p.spawn(download, line.strip())

key = line.split('/')[-1].strip()

removeLine(key, filename)

f.close()

p.join()

else:

print 'Usage: python %s urls.txt' % sys.argv[0]

其他网友的方法:

from os.path import basename

from urlparse import urlsplit

def url2name(url):

return basename(urlsplit(url)[2])

def download(url, localFileName = None):

localName = url2name(url)

req = urllib2.Request(url)

r = urllib2.urlopen(req)

if r.info().has_key('Content-Disposition'):

# If the response has Content-Disposition, we take file name from it

localName = r.info()['Content-Disposition'].split('filename=')[1]

if localName[0] == '"' or localName[0] == "'":

localName = localName[1:-1]

elif r.url != url:

# if we were redirected, the real file name we take from the final URL

localName = url2name(r.url)

if localFileName:

# we can force to save the file as specified name

localName = localFileName

f = open(localName, 'wb')

f.write(r.read())

f.close()

download(r'你要下载的python文件的url地址')

以上便是本文给大家分享的全部内容了,小伙伴们可以测试下哪种方法效率更高呢。

以上是 Python实现批量下载文件 的全部内容, 来源链接: utcz.com/z/315240.html

回到顶部