Python抓取淘宝IP地址数据

python

def fetch(ip):

url = \'http://ip.taobao.com/service/getIpInfo.php?ip=\' + ip

result = []

try:

response = urllib.urlopen(url).read()

jsondata = json.loads(response)

if jsondata[u\'code\'] == 0:

result.append(jsondata[u\'data\'][u\'ip\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'country\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'country_id\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'area\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'area_id\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'region\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'region_id\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'city\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'city_id\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'county\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'county_id\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'isp\'].encode(\'utf-8\'))

result.append(jsondata[u\'data\'][u\'isp_id\'].encode(\'utf-8\'))

else:

return 0, result

except:

logging.exception("Url open failed:" + url)

return 0, result

return 1, result

def worker(ratelimit, jobs, results, progress):

global cancel

while not cancel:

try:

ratelimit.ratecontrol()

ip = jobs.get(timeout=2) # Wait 2 seconds

ok, result = fetch(ip)

if not ok:

logging.error("Fetch information failed, ip:{}".format(ip))

progress.put("") # Notify the progress even it failed

elif result is not None:

results.put(" ".join(result))

jobs.task_done() # Notify one item

except Queue.Empty:

pass

except:

logging.exception("Unknown Error!")

def process(target, results, progress):

global cancel

while not cancel:

try:

line = results.get(timeout=5)

except Queue.Empty:

pass

else:

print >>target, line

progress.put("")

results.task_done()

def progproc(progressbar, count, progress):

"""

Since ProgressBar is not a thread-safe class, we use a Queue to do the counting job, like

two other threads. Use this thread do the printing of progress bar. By the way, it will

print to stderr, which does not conflict with the default result output(stdout).

"""

idx = 1

while True:

try:

progress.get(timeout=5)

except Queue.Empty:

pass

else:

progressbar.update(idx)

idx += 1

 

以上是 Python抓取淘宝IP地址数据 的全部内容, 来源链接: utcz.com/z/387861.html

回到顶部