python爬虫应用——爬取中关村在线的多个品牌多个交换机设备信息
结果如下


代码
import requests
from bs4 import BeautifulSoup as bs4
def main(name):
name=name
url="https://detail.zol.com.cn/switches/{}/".format(name)
response=requests.get(url=url)
dict1={}
html_text=bs4(response.text,'lxml')
sw_url=html_text.select('.pro-intro')[:3]
for i in sw_url:
for j in i.select('h3'):
name=j.text[1:-1]
for k in j.select('a'):
dict1[name]='https://detail.zol.com.cn'+k.attrs['href']
return dict1
def get_info(items):
import requests
from bs4 import BeautifulSoup as bs4
url=items[1]
rp=requests.get(url=url).text
bs4_obj=bs4(rp,'lxml')
tr=bs4_obj.select('.enterprise-level_table tr')
dict_info={}
list_info=[]
for i in tr:
for j in i.select('td')[1]:
list_info.append(j.text)
dict_info[items[0]]=list_info
return dict_info
import csv
for n in ['huawei','h3c','ruijie','cisco']:
for i in main(n).items():
for j in get_info(i).items():
with open("./info.csv", 'a', encoding='utf-8', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow([j[0]]+j[1][:8])
print(j[0],' '.join(j[1][:8]))