from bs4 import BeautifulSoup
import requests
from fake_useragent import UserAgent
import csv
# 预加载写入的文件
f = open('价格表.csv', mode='w', newline='')
csvwirter = csv.writer(f)
# 伪装UA
ua = UserAgent()
heard = {
"User-Agent": ua.firefox
}
# 初始化URL.get方法请求
url = 'https://www.lvguo.net/baojia'
resp = requests.get(url, headers=heard, )
# 实例化bs4对象
page = BeautifulSoup(resp.text, features="html.parser") # features="html.parser" 标记传入的数据为html数据
# 查找标记为table class=bjtbl的数据
table = page.find('table', attrs={"class": "bjtbl"})
# 找到所有包含 tr 的数据,返回值为list tr[1:] 切除数据注释行
tr = table.find_all('tr')
trs = tr[1:]
# 遍历每行的数据
for i in trs:
# 查找标记为td的数据
td = i.find_all('td')
time = td[0].text
adress = td[1].text
name = td[2].text
# 数据拆分
type = td[3].text.split("\n")[0].split(':')[0]
price = td[3].text.split("\n")[0].split(':')[1] + td[3].text.split("\n")[1]
# 数据写入
csvwirter.writerow([time, adress, name, type, price])
f.close()
标签: