可见即可爬-爬取喜马拉雅学习资料音频简单应用
# 应用浏览器媒体检查 就可以解析喜马拉雅音频存放地址
# 接下来代码就很简单
import requests
# 定义请求标头
headers = {
'authority': 'www.ximalaya.com',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept_encoding': 'gzip, deflate, br',
'accept_language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
'cookie': '',
'user-agent': ''
}
def get_parse():
response = requests.get('https://www.ximalaya.com/album/20073114', headers=headers)
print(response.status_code)
response.encoding = response.apparent_encoding
download(audio_url)
# 批量下载音频资源
def download(audio_url):
audio = requests.get(audio_url, headers=headers).content
with open('日语精度课文{}.mp3'.format(i+1), 'wb') as f:
f.write(audio)
data_1 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1B/wKgLe1wX2ISy6MEgABdtBu708K4275-aaCV2-48K.m4a'
data_2 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1B/wKgLe1wX2ImSRadVABxDXHmULK8458-aaCV2-48K.m4a'
data_3 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1C/wKgLe1wX2IzB0gjYABzjVa0viBU053-aaCV2-48K.m4a'
data_4 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1C/wKgLe1wX2I6ww6OFABa0kcX62rQ734-aaCV2-48K.m4a'
data_5 = 'https://aod.cos.tx.xmcdn.com/group52/M04/33/E4/wKgLcFwX2JLjhx4CABXuHHahWMw543.m4a'
data_6 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1D/wKgLe1wX2Jmim_6qACYp_dMyN78480.m4a'
data_7 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1D/wKgLe1wX2JuyV-ptABV16Qz1Oag179.m4a'
data_8 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1D/wKgLe1wX2KLDjyEtAB4gt1PTOVk369.m4a'
data_9 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1E/wKgLe1wX2KjhGwCvACSRkS-ZHso757.m4a'
data_10 = 'https://aod.cos.tx.xmcdn.com/group52/M04/34/1E/wKgLe1wX2KqAmo38ABdqb3Q-t6E290.m4a'
data_11 = 'https://aod.cos.tx.xmcdn.com/group54/M00/33/98/wKgLfVwX2L6iNPekACgruzJ8pmg311.m4a'
data_12 = 'https://aod.cos.tx.xmcdn.com/group54/M00/33/80/wKgLclwX2LvB6tOnACCrCOnel-A401.m4a'
# 通过media也可以解析
audio_url_list = [data_1, data_2, data_3, data_4, data_5, data_6,
data_7, data_8, data_9, data_10, data_11, data_12]
for i, audio_url in enumerate(audio_url_list):
get_parse()

