Python - 爬取美女图片

python

目录

  • 校花网
  • 爬取单页
  • 7160图片大全
  • 天极网

  • 返回Python目录

校花网官网

pip install requests

pip install BeautifulSopu4

爬取单页

python">import os

import requests

from bs4 import BeautifulSoup

base_dir = os.path.dirname(os.path.abspath(__file__))

def spider():

response = requests.get(url='https://nice.ruyile.com/?f=2')

soup = BeautifulSoup(response.text, 'html.parser')

all_content = soup.find(name='div', attrs={'class': 'm3_xhtp'}) # 拿到所有图片标签外部的div

tag_list = all_content.find_all(name='div', attrs={'class': 'tp_list'})

for item in tag_list:

# 获取每个妹子的具体链接

res = item.find_all(name='a')[1]

a_content_file_path = res.text

a_url = response.url.split('/?')[0] + res.get('href')

# print(a_url, a_content_file_path) # https://nice.ruyile.com/r16604/ 清纯大眼睛MM

# 进入每个妹子的详情页

girl_details = requests.get(url=a_url)

girl_soup = BeautifulSoup(girl_details.text, 'html.parser')

img_all_div = girl_soup.find_all(name='div', attrs={'class': 'm6_js'})[1]

img_list = img_all_div.find_all(name='p')

os.makedirs(os.path.join(base_dir, 'img_list', a_content_file_path))

file_path = os.path.join(base_dir, 'img_list', a_content_file_path)

for i in img_list:

img_src = i.find(name='img').get('src')

img_content = requests.get(url=img_src)

with open(os.path.join(file_path, img_src.rsplit('/')[-1]), 'wb') as f:

f.write(img_content.content)

if __name__ == '__main__':

spider()

7160图片大全

http://www.7160.com/xiaohua/list_6_1.html

import os

import requests

from concurrent.futures import ThreadPoolExecutor

from bs4 import BeautifulSoup

BASE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)))

def spider(line):

response = requests.get(url='http://www.7160.com/xiaohua/list_6_{}.html'.format(line))

# print(response.encoding)

response.encoding = 'GBK'

soup = BeautifulSoup(response.text, 'html.parser')

div = soup.find(name='div', attrs={'class': "news_bom-left"})

for li in div.find_all(name='li'):

a_url = li.find('img').get('src')

print(response.url, a_url)

path = os.path.join(BASE_PATH, 'a', a_url.rsplit('/', 1)[-1])

with open(path, mode='wb') as f:

res = requests.get(a_url)

f.write(res.content)

def run():

t = ThreadPoolExecutor(10)

for i in range(1, 11):

t.submit(spider, i)

if __name__ == '__main__':

run()

天极网

天极网明星图片:http://pic.yesky.com/c/6_243_1.shtml

import os

import requests

from concurrent.futures import ThreadPoolExecutor

from bs4 import BeautifulSoup

BASE_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)))

def worker(a_url, title):

response = requests.get(a_url)

# print(response.encoding)

response.encoding = 'GBK'

# print(response.text)

soup = BeautifulSoup(response.text, 'html.parser')

div = soup.find(name='div', attrs={'class': "overview"})

# print(div)

for item in div.find_all(name='img'):

src = item.get('src')

print(a_url, src)

path = os.path.join(BASE_PATH, 'b', title, src.rsplit('/', 1)[-1])

with open(path, 'wb') as f:

res = requests.get(src.replace('113x113', '740x-'))

f.write(res.content)

def spider(line):

response = requests.get(url='http://pic.yesky.com/c/6_243_{}.shtml'.format(line))

soup = BeautifulSoup(response.text, 'html.parser')

div = soup.find(name='div', attrs={'class': "lb_box"})

# print(div)

for dd in div.find_all(name='dd'):

a_url, title = dd.find('a').get("href"), dd.find('a').get("title")

path = os.path.join(BASE_PATH, 'b', title)

if not os.path.isdir(os.path.join(BASE_PATH, 'b', title)):

os.mkdir(path)

worker(a_url, title)

# break

def run():

t = ThreadPoolExecutor(10)

for i in range(1, 11):

t.submit(spider, i)

if __name__ == '__main__':

run()

以上是 Python - 爬取美女图片 的全部内容, 来源链接: utcz.com/z/386910.html

回到顶部