欢迎光临散文网 会员登陆 & 注册

python requests模块演示

2022-05-13 22:47 作者:碌碌浮尘  | 我要投稿

import requests

from bs4 import BeautifulSoup

import csv

import re

import time

import json

import os.path

import random


record ={}

def func(date):

    global writer



    dailylist =[date]

    record[date]={}

    url='https://weixin.sogou.com/weixin?type=2&s_from=input&query={}+各区确诊病例++shanghaifabu&ie=utf8&_sug_=y&_sug_type_=&w=01019900&sut=523&sst0=1652065715152&lkt=0%2C0%2C0'\

         .format(date)

    headers={    'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) \

            appleWebKit/537.36 (KHTML, like Gecko) Chrome/\

            86.0.4240.75 Safari/537.36',

}

    sogoulink = 'https://weixin.sogou.com'

    data=requests.get(url,headers = headers,timeout=5)

    cookie = data.cookies.get_dict()

    headers['cookie']=\

''.join([str(item[0])+'='+str(item[1])+'; ' for item in cookie.items()])

    soup = BeautifulSoup(data.text,features = 'html.parser')

    find =False


    for n in range(8):

        account = (soup.select('#sogou_vr_11002601_account_{}'.format(n)))[0].text

        if account =='上海发布':

            title = (soup.select('#sogou_vr_11002601_title_{}'.format(n)))[0]

            titletext = title.text

            if date in titletext and '本市各区' in titletext \

               and '确诊病例' in titletext and '居住地信息'in titletext:

                link = sogoulink + title.get('href')

                find = True

                print(titletext)

                break

    time.sleep(0.5)

    if find:

        daily = requests.get(link,headers = headers,timeout =5)

        a=re.findall(r"url \+.*'",daily.text)

        flink = ''.join([t.replace('url += ','').replace("\'",'') for t in a])

        time.sleep(0.5)

        fdaily = requests.get(flink,headers = headers,timeout =5)


        chips = re.findall(r'{}.*?p>'.format(date),fdaily.text)

        chipsp = re.sub('[,、]',',',chips[0]).split(',')

        for item in chipsp:

            if '确诊' in item and '无症状'in item and '隔离'not in item:

                totquezhen =int(re.findall('\d+',item)[0])

                totwuzheng =int(re.findall('\d+',item)[1])

                record[date]['total']=[int(totquezhen),int(totwuzheng)]

                dailylist.append(totquezhen+totwuzheng)

                print('总计确诊:{} 无症状:{}'.format(totquezhen,totwuzheng))

                break

                

        for chip in chips[1:]:

            chipsp = re.sub('[,、]',',',chip)# # 中文标点转化为英文逗号

            qu = (re.findall(',.+区',chipsp))[0][1:]

            qu = re.sub(r'<.*>','',qu)

            

            quezhen =0

            wuzheng =0

            chipsp = chipsp.split(',')


            for item in chipsp:

                if '确诊' in item and re.findall('\d+',item) and '无症状' not in item:

                    quezhen = int(re.findall('\d+',item)[0])

                if '无症状' in item and re.findall('\d+',item) and '确诊'not in item:

                    wuzheng = int(re.findall('\d+',item)[0])

            print('{} 确诊:{} 无症状:{}'.format(qu,quezhen,wuzheng))

            record[date][qu]=[quezhen,wuzheng]

            dailylist.append(quezhen+wuzheng)


        if sum([ int(record[date][f][0]) for f in record[date].keys() if f!='total'])\

           != int(totquezhen) or\

           sum([ int(record[date][f][1]) for f in record[date].keys() if f!='total'])\

           !=  int(totwuzheng):

            print('统计存在偏差!')

    else:

        print('未找到数据!')



        

for day in range(12,13):

    date ='5月{}日'.format(day)

    try:

        func(date)

    except:

        print(date,' error')

    time.sleep(5)


python requests模块演示的评论 (共 条)

分享到微博请遵守国家法律