Python爬虫实战教程:批量爬取某网站图片
import re
import time
import os
import requests
def get_html_str(url, head):
resp = requests.get(url=url, headers=head).text
return resp
if __name__ == '__main__':
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62 '
}
start = input("请输入起始页[start,end]:")
end = input("请输入终止页:")
for i in range(int(start), int(end) + 1):
base_url = "https://www.vmgirls.net/page/{}?ref=www.xike.store".format(i)
txt01 = get_html_str(base_url, headers)
re01 = 'class="media-content" href="(.*?)"'
title_url = re.findall(re01, txt01)
for j in title_url:
txt02 = get_html_str(j, headers)
re02 = '<h1 class="post-title mb-3">(.*?)</h1>'
file_title = re.findall(re02, txt02)[-1]
file_title.replace('/', '').replace('|', '').replace(':', '')
file_path = 'weimei/'+file_title
if os.path.exists(file_path):
print('文件夹已存在')
continue
os.mkdir(file_path)
re03 = 'src="(.*?)"'
img_url = re.findall(re03, txt02)
del img_url[0:1]
# print(img_url)
for k in range(1, len(img_url)):
img_name = str(k)+".png"
img = requests.get(url=img_url[k], headers=headers).content
with open(file_path+'/'+img_name, 'wb') as fp:
fp.write(img)
fp.close()
print(f"第{k}张下载成功")
time.sleep(0.3)
# 没难度

