尚硅谷Python爬虫教程小白零基础速通(含python基础+爬虫案例)

P76 爬取星巴克图片源码:
# 下载星巴克所有产品图片 import urllib.request as ur import os from lxml import etree import threading url = 'https://www.starbucks.com.cn/menu/' response = ur.urlopen(url) content = response.read().decode('utf-8') tree = etree.HTML(content) # 提取图片地址属性 src_list = tree.xpath("//div[@class='preview circle']/@style") name_list = tree.xpath("//strong/text()") # 拼接图片地址 images_url_list = [url[:-6] + src[23:-2] for src in src_list] # 创建目录用于保存图片 os.makedirs('../starbucks_images', exist_ok=True) # 下载图片的线程函数 def download_image(image_url, filename, index): try: ur.urlretrieve(image_url, filename) print(f"下载图片{index}: {filename} 完成") except Exception as e: print(f"下载图片{index}: {filename} 时出错: {str(e)}") # 多线程下载图片 threads = [] for i, (image_url, name) in enumerate(zip(images_url_list, name_list)): filename = f'../starbucks_images/{name.replace("/", " or ")}.jpg' thread = threading.Thread(target=download_image, args=(image_url, filename, i + 1)) threads.append(thread) thread.start() # 等待所有线程完成 for thread in threads: thread.join() print("图片下载完成")