欢迎光临散文网 会员登陆 & 注册

备份5(python爬虫任务定时启动)

2022-01-01 23:48 作者:Power_Tea  | 我要投稿

记录自己的学习过程用,以下代码有定时模块,爬虫模块,数据写入模块



#! /usr/bin/python3

# coding=utf-8


import re, os

import datetime

import csv

from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

from selenium import webdriver

from selenium.webdriver import Chrome

import time


import schedule


os.system('mkdir -p ./数据')




def web1(name, url):  

    # name = "SHIB"

    # url = f"https://www.okex.com/trade-spot/shib-usdt"

    now = datetime.datetime.now()

    now_time = now.strftime("%Y-%m-%d %H:%M:%S")

    path_csv = f"./数据/{name}数据汇总.csv"


    options = webdriver.ChromeOptions  # 调用ChromeOptions方法

    options.binary_location = r'/opt/apps/cn.google.chrome/files/google-chrome'  # Chrome执行路径

    # web = webdriver.Chrome()

    web = Chrome()

    web.maximize_window()

    web.get(url)  # 打开网页


    time.sleep(3)

    # web.find_element(By.XPATH, '//*[@id="details-button"]').click()

    # web.find_element(By.XPATH, '// *[ @ id = "proceed-link"]').click()

    # print(web.title)  # 显示页面标题

    print(f"====={name}=====")

    html = web.page_source

    # print(html)


    obj = re.compile(

        r'">24h最低</span><span class="value">(?P<low>.*?)</span></div>.*?">24h最高</span><span class="value">(?P<hight>.*?)</span></div>.*?</span><span class="value">(?P<number>.*?)亿 </span></div>.*?<span class="label">24h额</span><span class="value">(?P<money>.*?)</span></div>',

        re.S)

    result = obj.finditer(html)

    # print(result)

    for it in result:

        low = it.group('low')

        hight = it.group('hight')

        number = it.group('number')

        money = it.group('money')


        print(low, hight, number, money)


        with open(path_csv, "r", newline="") as inccsv, open(path_csv, "a", newline="") as outcsv:

            freader = csv.reader(inccsv, delimiter=",")

            fwriter = csv.writer(outcsv)

            fwriter.writerow([low, hight, number, money, now_time])


    print(f"所有{name}数据已经导出完成!!!")

    web.close()

    time.sleep(1)




def web2(name, url):  

    # name = "SHIB"

    # url = f"https://www.okex.com/trade-spot/shib-usdt"

    now = datetime.datetime.now()

    now_time = now.strftime("%Y-%m-%d %H:%M:%S")

    path_csv = f"./数据/{name}数据汇总.csv"


    options = webdriver.ChromeOptions  # 调用ChromeOptions方法

    options.binary_location = r'/opt/apps/cn.google.chrome/files/google-chrome'  # Chrome执行路径

    # web = webdriver.Chrome()

    web = Chrome()

    web.maximize_window()

    web.get(url)  # 打开网页


    time.sleep(3)

    # web.find_element(By.XPATH, '//*[@id="details-button"]').click()

    # web.find_element(By.XPATH, '// *[ @ id = "proceed-link"]').click()

    # print(web.title)  # 显示页面标题

    print(f"====={name}=====")

    html = web.page_source

    # print(html)


    obj = re.compile(

        r'">24h最低</span><span class="value">(?P<low>.*?)</span></div>.*?">24h最高</span><span class="value">(?P<hight>.*?)</span></div>.*?class="label">24h量.*?</span><span class="value">(?P<number>.*?)万 </span></div>.*?">24h额</span><span class="value">(?P<money>.*?)</span></div>',

        re.S)

    result = obj.finditer(html)

    # print(result)

    for it in result:

        low = it.group('low')

        hight = it.group('hight')

        number = it.group('number')

        money = it.group('money')


        print(low, hight, number, money)


        with open(path_csv, "r", newline="") as inccsv, open(path_csv, "a", newline="") as outcsv:

            freader = csv.reader(inccsv, delimiter=",")

            fwriter = csv.writer(outcsv)

            fwriter.writerow([low, hight, number, money, now_time])


    print(f"所有{name}数据已经导出完成!!!")

    web.close()

    time.sleep(1)




def man1():

    names1 = {

        "SHIB": "https://www.okex.com/trade-spot/shib-usdt",

    }


    for i in names1:

        # print(i, names1[i])

        i = i

        j = names1[i]

        path_csv = f"./数据/{i}数据汇总.csv"

        if not os.access(path_csv, os.F_OK):

            f = open(path_csv, mode="w")

            csvwriter = csv.writer(f)

            csvwriter.writerow(["24H最低", "24H最高", f"24H量({i})", "24H额"])

            f.close()


        else:

            web1(i, j)


def man2():

    names2 = {

        "ICP": "https://www.okex.com/trade-spot/icp-usdt",

        "OMG": "https://www.okex.com/trade-spot/omg-usdt",

        "Doge": "https://www.okex.com/trade-spot/doge-usdt",

        "xch": "https://www.okex.com/trade-spot/xch-usdt",

        "BTC": "https://www.okex.com/trade-spot/btc-usdt",

        "ETH": "https://www.okex.com/trade-spot/eth-usdt",


    }

    for i in names2:

        # print(i, names2[i])

        i = i

        j = names2[i]

        path_csv = f"./数据/{i}数据汇总.csv"

        if not os.access(path_csv, os.F_OK):

            f = open(path_csv, mode="w")

            csvwriter = csv.writer(f)

            csvwriter.writerow(["24H最低", "24H最高", f"24H量({i})", "24H额"])

            f.close()


        else:

            web2(i, j)


def man():

    man1()

    man2()



if __name__ == '__main__':

    schedule.every(10).minutes.do(man)

    while True:

        schedule.run_pending()

        time.sleep(1)


以下是时间管理模块


备份5(python爬虫任务定时启动)的评论 (共 条)

分享到微博请遵守国家法律