啦啦啦啦 我来交作业了
from lxml import etree
import requests
import re
import json
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50',
'cookie': 'lv=1676985897; vn=1; ip_ck=4MCF4f//j7QuNzc0NjU4LjE2NzY5ODU4OTc=; Hm_lvt_ae5edc2bc4fc71370807f6187f0a2dd0=1676985897; Adshow=0; Hm_lpvt_ae5edc2bc4fc71370807f6187f0a2dd0=1676987325; questionnaire_pv=1676937645'
}
url = 'https://desk.zol.com.cn/meinv/'
def get_home_page(url):
res = requests.get(url, headers=headers)
res.encoding = 'GB2312'
return res.text
def get_urls(html):
et = etree.HTML(html)
urls = et.xpath('/html/body/div/div/ul[@class="pic-list2 clearfix"]/li/a/@href')
return urls
def get_imgs(urls):
imgsrcs = []
html = get_home_page(urls)
obj = re.compile(r'var deskPicArr.*?=(?P<deskPicArr>.*?);', re.S)
result = obj.search(html)
deskPicArr = result.group('deskPicArr').strip()
deskPic = json.loads(deskPicArr)
msg = deskPic.get('list')
for item in msg:
oriSize = item.get('oriSize')
imgsrc = item.get('imgsrc')
imgsrc = imgsrc.replace('##SIZE##', oriSize)
imgsrcs.append(imgsrc)
return imgsrcs
def download(imgsrc):
res = requests.get(url=imgsrc, headers=headers)
name = imgsrc.split('/')[-1]
with open(f'wallpaper/{name}', 'wb') as f:
f.write(res.content)
def run():
html = get_home_page(url)
urls = get_urls(html)
for u in urls[2:]:
urls = 'https://desk.zol.com.cn/' + u[1:]
imgsrcs = get_imgs(urls)
for imgsrc in imgsrcs:
download(imgsrc)
run()
标签: