备份5(python爬虫任务定时启动)
记录自己的学习过程用,以下代码有定时模块,爬虫模块,数据写入模块
#! /usr/bin/python3
# coding=utf-8
import re, os
import datetime
import csv
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
from selenium import webdriver
from selenium.webdriver import Chrome
import time
import schedule
os.system('mkdir -p ./数据')
def web1(name, url):
# name = "SHIB"
# url = f"https://www.okex.com/trade-spot/shib-usdt"
now = datetime.datetime.now()
now_time = now.strftime("%Y-%m-%d %H:%M:%S")
path_csv = f"./数据/{name}数据汇总.csv"
options = webdriver.ChromeOptions # 调用ChromeOptions方法
options.binary_location = r'/opt/apps/cn.google.chrome/files/google-chrome' # Chrome执行路径
# web = webdriver.Chrome()
web = Chrome()
web.maximize_window()
web.get(url) # 打开网页
time.sleep(3)
# web.find_element(By.XPATH, '//*[@id="details-button"]').click()
# web.find_element(By.XPATH, '// *[ @ id = "proceed-link"]').click()
# print(web.title) # 显示页面标题
print(f"====={name}=====")
html = web.page_source
# print(html)
obj = re.compile(
r'">24h最低</span><span class="value">(?P<low>.*?)</span></div>.*?">24h最高</span><span class="value">(?P<hight>.*?)</span></div>.*?</span><span class="value">(?P<number>.*?)亿 </span></div>.*?<span class="label">24h额</span><span class="value">(?P<money>.*?)</span></div>',
re.S)
result = obj.finditer(html)
# print(result)
for it in result:
low = it.group('low')
hight = it.group('hight')
number = it.group('number')
money = it.group('money')
print(low, hight, number, money)
with open(path_csv, "r", newline="") as inccsv, open(path_csv, "a", newline="") as outcsv:
freader = csv.reader(inccsv, delimiter=",")
fwriter = csv.writer(outcsv)
fwriter.writerow([low, hight, number, money, now_time])
print(f"所有{name}数据已经导出完成!!!")
web.close()
time.sleep(1)
def web2(name, url):
# name = "SHIB"
# url = f"https://www.okex.com/trade-spot/shib-usdt"
now = datetime.datetime.now()
now_time = now.strftime("%Y-%m-%d %H:%M:%S")
path_csv = f"./数据/{name}数据汇总.csv"
options = webdriver.ChromeOptions # 调用ChromeOptions方法
options.binary_location = r'/opt/apps/cn.google.chrome/files/google-chrome' # Chrome执行路径
# web = webdriver.Chrome()
web = Chrome()
web.maximize_window()
web.get(url) # 打开网页
time.sleep(3)
# web.find_element(By.XPATH, '//*[@id="details-button"]').click()
# web.find_element(By.XPATH, '// *[ @ id = "proceed-link"]').click()
# print(web.title) # 显示页面标题
print(f"====={name}=====")
html = web.page_source
# print(html)
obj = re.compile(
r'">24h最低</span><span class="value">(?P<low>.*?)</span></div>.*?">24h最高</span><span class="value">(?P<hight>.*?)</span></div>.*?class="label">24h量.*?</span><span class="value">(?P<number>.*?)万 </span></div>.*?">24h额</span><span class="value">(?P<money>.*?)</span></div>',
re.S)
result = obj.finditer(html)
# print(result)
for it in result:
low = it.group('low')
hight = it.group('hight')
number = it.group('number')
money = it.group('money')
print(low, hight, number, money)
with open(path_csv, "r", newline="") as inccsv, open(path_csv, "a", newline="") as outcsv:
freader = csv.reader(inccsv, delimiter=",")
fwriter = csv.writer(outcsv)
fwriter.writerow([low, hight, number, money, now_time])
print(f"所有{name}数据已经导出完成!!!")
web.close()
time.sleep(1)
def man1():
names1 = {
"SHIB": "https://www.okex.com/trade-spot/shib-usdt",
}
for i in names1:
# print(i, names1[i])
i = i
j = names1[i]
path_csv = f"./数据/{i}数据汇总.csv"
if not os.access(path_csv, os.F_OK):
f = open(path_csv, mode="w")
csvwriter = csv.writer(f)
csvwriter.writerow(["24H最低", "24H最高", f"24H量({i})", "24H额"])
f.close()
else:
web1(i, j)
def man2():
names2 = {
"ICP": "https://www.okex.com/trade-spot/icp-usdt",
"OMG": "https://www.okex.com/trade-spot/omg-usdt",
"Doge": "https://www.okex.com/trade-spot/doge-usdt",
"xch": "https://www.okex.com/trade-spot/xch-usdt",
"BTC": "https://www.okex.com/trade-spot/btc-usdt",
"ETH": "https://www.okex.com/trade-spot/eth-usdt",
}
for i in names2:
# print(i, names2[i])
i = i
j = names2[i]
path_csv = f"./数据/{i}数据汇总.csv"
if not os.access(path_csv, os.F_OK):
f = open(path_csv, mode="w")
csvwriter = csv.writer(f)
csvwriter.writerow(["24H最低", "24H最高", f"24H量({i})", "24H额"])
f.close()
else:
web2(i, j)
def man():
man1()
man2()
if __name__ == '__main__':
schedule.every(10).minutes.do(man)
while True:
schedule.run_pending()
time.sleep(1)
以下是时间管理模块