抓取京东商品评论-以你的名字书籍为例
# 分析网页,获得评论所在地址 响应内容类似一个json文件,可以进行处理后得到json格式


# 所用的库
import json
import requests

# 抓取并打印评论内容
headers = {
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'connection': 'keep-alive',
'cookie': ' ',
'host': 'club.jd.com',
'referer': 'https://item.jd.com/',
'user-agent': ''
}
page_num = 0 # 评论页码
serial_num = 0 # 评论序列号
while page_num < 6: # 抓取前6页
url = 'https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98&productId=10699613518&score=0&sortType=5&page={}&pageSize=10&isShadowSku=0&fold=1'.format(page_num)
page_num += 1
html = requests.get(url, headers=headers)
text = str(html.content, encoding='iso-8859-1')
json_str = text.replace('fetchJSON_comment98(', '') # 去头
json_str_2 = json_str.replace(')', '') # 去尾
json_str_3 = json_str_2.replace(';', '')
json_obj = json.loads(json_str_3)
for i in range(0, len(json_obj['comments'])):
try:
comment = json_obj['comments'][i]['content'].encode(encoding='iso-8859-1').decode('GB18030')
if comment != '此用户未填写评价内容':
print(serial_num + 1, '. ', comment)
creation_time = json_obj['comments'][i]['creationTime']
nickname = json_obj['comments'][i]['nickname'].encode(encoding='iso-8859-1').decode('GB18030')
print(creation_time)
print(nickname)
print('----------------')
serial_num += 1
except Exception as e:
print(e)

私货:没有

