【爬虫完整版】学完可以接任何爬虫副业单!来者不拒!目前B站最完整的python学
P31 最新代码来了,大家可以用我的代码试试
目前只抓取1-10页的,如何有需要大家自己改,但是不是恶意抓取,仅供学习!谢谢!
import dic as dic
import requests
import re
from io import StringIO
import json
from bs4 import BeautifulSoup
import lxml
if __name__ == "__main__":
for i in range(1, 11):
url = 'https://www.umei.cc/bizhitupian/meinvbizhi/'
if i > 1:
url = 'https://www.umei.cc/bizhitupian/meinvbizhi/' + 'index_' + str(i) + '.htm'
print(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.'
}
dic = {
'href': ''
}
response = requests.get(url=url, headers=headers, stream=True)
response.encoding = 'utf-8'
page_text = BeautifulSoup(response.text, 'html.parser')
d_list = page_text.find('div', {'id': 'infinite_scroll'}).find_all('div', class_='title')
for d in d_list:
href = 'https://www.umei.cc/' + d.find('a').get('href')
url1 = href
headers1 = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.'
}
response1 = requests.get(url=url1, headers=headers1, stream=True)
response1.encoding = 'utf-8'
page_text1 = BeautifulSoup(response1.text, 'html.parser')
big = page_text1.find('div', class_='big-pic')
big_src = big.find('img').get('src')
print(big_src)

