#拿到页面原地阿玛
#拿到所有ul上面得a标签
from lxml import etree
import re
import requests
import json
domain = 'https://desk.zol.com.cn'
url = 'https://desk.zol.com.cn/'
resp = requests.get(url)
resp.encoding = 'gbk'
# print(resp.text)
et = etree.HTML(resp.text)
result = et.xpath('//ul[@class="pic-list2 clearfix"]/li/a/@href')
# print(result)
for item in result:
url = domain + item
print(url)
# url = 'https://desk.zol.com.cn/bizhi/10055_120350_2.html'
resp = requests.get(url)
# print(resp.text)
obj = re.compile(r'var deskPicArr.*?=(?P<deskPicArr>.*?);',re.S)
result = obj.search(resp.text)
deskPicArr = result.group('deskPicArr')
# print(deskPicArr)
dic = json.loads(deskPicArr)
# print(dic)
for item in dic['list']:
oriSize = item.get('oriSize')
imgsrc = item.get('imgsrc')
# print(oriSize,imgsrc)
imgsrc = imgsrc.replace('##SIZE##',oriSize)
print(imgsrc)
#发送网络请求
name = imgsrc.split('/')[-1]
resp_img = requests.get(imgsrc)
#此时拿到的不是resp.text, resp.content拿到的是字节
with open(f"img/{name}",mode="wb") as f:
f.write(resp_img.content)
# print(f)
标签: