爬取海艺某个画师全部作品
# 糖醋慕雨的快乐时光
import requests
import json
import os
# 获取程序的绝对路径
current_directory = os.path.dirname(os.path.abspath(__file__))
# 创建douyin文件夹路径
douyin_folder = os.path.join(current_directory, "haiyi")
# 如果douyin文件夹不存在,则创建它
if not os.path.exists(douyin_folder):
os.mkdir(douyin_folder)
url = 'https://www.haiyiai.com/api/v1/artwork/home/list'
# https://www.haiyiai.com/user/1873663116 画师主页链接
account_no = input('请输入画师主页链接: ')
account_no = int(account_no.split('user/')[1])
detail_params = { # 调节参数
'account_no': f"{account_no}",# 画师id
'keyword': "",
'order_by': "new",
'page': 1,
'page_size': 20,
}
headers = {
'token': '', # 添加自己的token
'user-agent': '', # 添加自己的user-agent
'referer': f'https://www.haiyiai.com/user/{account_no}',
'cookie': '', # 添加自己的cookie
'x-device-id': '316bfe5d-150c-4498-afd1-291bb177d212',
'x-platform': 'web',
}
def scrape_Main(url, headers, params):
response = requests.post(url, headers=headers, json=params)
try:
if response.status_code == 200:
response.encoding = response.apparent_encoding
# print(response.text)/
return response.text
except requests.RequestException:
print('请求失败')
def parsel_page(datas):
datas = datas['data']['items']
data_json = []
for data in datas:
url = data['banner']['url'] # 题目
height = data['banner']['height']
width = data['banner']['width']
data_json.append({"url": url, "height": height, "width": width})
return data_json
def main(url, headers, params):
json_data = json.loads(scrape_Main(url, headers, params))
url_img = parsel_page(json_data)
for img in url_img:
img_url = img['url']
content = requests.get(url=img_url, headers=headers).content
topic = img_url.split('/')[-1]
with open('haiyi//' + topic , 'wb') as f:
f.write(content)
print(topic + '-------------下载成功')
main(url,headers,detail_params)