2021年最新Python爬虫教程+实战项目案例(新增scarpy部分+分布式爬
p31 去除爬取子页面的代码直接爬取图片。 精简了代码,减少了requests的请求。
import requests
from bs4 import BeautifulSoup
import os
os.mkdir(r"D:\\python_homework\\src\\爬取内容\\")
url = 'https://www.umei.cc/weimeitupian/xiaoqingxintupian/'
url_2 = 'https://www.umei.cc'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 SLBrowser/8.0.0.9071 SLBChan/105'
}
response = requests.get(url)
response.encoding = 'utf-8'
mian_page = BeautifulSoup(response.text, "html.parser")
imglist = mian_page.find("div", class_='item_list infinite_scroll').find_all("img", class_="lazy")
for i in imglist:
img = requests.get(i.get("data-original"), headers=headers).content
name = i.get("data-original").split("/")[-1]
with open('D:\\python_homework\\src\\爬取内容\\plcture\\{}'.format(name), 'wb')as fp:
fp.write(img)
print(name + "下载完成!")
fp.close()

