python requests模块演示

2022-05-13 22:47 作者:碌碌浮尘 0人读过 | 我要投稿

import requests

from bs4 import BeautifulSoup

import csv

import re

import time

import json

import os.path

import random

record ={}

def func(date):

global writer

dailylist =[date]

record[date]={}

url='https://weixin.sogou.com/weixin?type=2&s_from=input&query={}+各区确诊病例++shanghaifabu&ie=utf8&_sug_=y&_sug_type_=&w=01019900&sut=523&sst0=1652065715152&lkt=0%2C0%2C0'\

.format(date)

headers={ 'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) \

appleWebKit/537.36 (KHTML, like Gecko) Chrome/\

86.0.4240.75 Safari/537.36',

}

sogoulink = 'https://weixin.sogou.com'

data=requests.get(url,headers = headers,timeout=5)

cookie = data.cookies.get_dict()

headers['cookie']=\

''.join([str(item[0])+'='+str(item[1])+'; ' for item in cookie.items()])

soup = BeautifulSoup(data.text,features = 'html.parser')

find =False

for n in range(8):

account = (soup.select('#sogou_vr_11002601_account_{}'.format(n)))[0].text

if account =='上海发布':

title = (soup.select('#sogou_vr_11002601_title_{}'.format(n)))[0]

titletext = title.text

if date in titletext and '本市各区' in titletext \

and '确诊病例' in titletext and '居住地信息'in titletext:

link = sogoulink + title.get('href')

find = True

print(titletext)

break

time.sleep(0.5)

if find:

daily = requests.get(link,headers = headers,timeout =5)

a=re.findall(r"url \+.*'",daily.text)

flink = ''.join([t.replace('url += ','').replace("\'",'') for t in a])

time.sleep(0.5)

fdaily = requests.get(flink,headers = headers,timeout =5)

chips = re.findall(r'{}.*?p>'.format(date),fdaily.text)

chipsp = re.sub('[，、]',',',chips[0]).split(',')

for item in chipsp:

if '确诊' in item and '无症状'in item and '隔离'not in item:

totquezhen =int(re.findall('\d+',item)[0])

totwuzheng =int(re.findall('\d+',item)[1])

record[date]['total']=[int(totquezhen),int(totwuzheng)]

dailylist.append(totquezhen+totwuzheng)

print('总计确诊:{} 无症状:{}'.format(totquezhen,totwuzheng))

break

for chip in chips[1:]:

chipsp = re.sub('[，、]',',',chip)# # 中文标点转化为英文逗号

qu = (re.findall(',.+区',chipsp))[0][1:]

qu = re.sub(r'<.*>','',qu)

quezhen =0

wuzheng =0

chipsp = chipsp.split(',')

for item in chipsp:

if '确诊' in item and re.findall('\d+',item) and '无症状' not in item:

quezhen = int(re.findall('\d+',item)[0])

if '无症状' in item and re.findall('\d+',item) and '确诊'not in item:

wuzheng = int(re.findall('\d+',item)[0])

print('{} 确诊:{} 无症状:{}'.format(qu,quezhen,wuzheng))

record[date][qu]=[quezhen,wuzheng]

dailylist.append(quezhen+wuzheng)

if sum([ int(record[date][f][0]) for f in record[date].keys() if f!='total'])\

!= int(totquezhen) or\

sum([ int(record[date][f][1]) for f in record[date].keys() if f!='total'])\

!= int(totwuzheng):

print('统计存在偏差！')

else:

print('未找到数据！')

for day in range(12,13):

date ='5月{}日'.format(day)

try:

func(date)

except:

print(date,' error')

time.sleep(5)

标签：

python requests模块演示

python requests模块演示的评论 (共条)

你可能也喜欢这些文章

最新发布的文章

python requests模块演示

本文作者的其他文章

python requests模块演示的评论 (共 条)

你可能也喜欢这些文章

最新发布的文章

python requests模块演示的评论 (共条)