返回

新手如何完成python视频爬虫

发布时间:2023-01-14 22:54:44 159
# python# 爬虫# json

作为新手小白学习爬虫,重要的就是实战经验,爬虫语言有多种,今天我们就通过python语言来做爬虫视频,下面的代码值得大家借鉴参考。

# coding=utf-8

import json
import os.path
import pprint

import requests


def get_page(pcursor):
path = 'video/'
if not os.path.exists(path):
os.mkdir(path)
# 爬取对象'https://www.kuaishou.com/profile/3xhv7zhkfr3rqag'
"""
ctrl+r 批量替换
​​ https://www.kuaishou.com/short-video/3xw5fmcf9jdap29?authorId=3xhv7zhkfr3rqag&streamSource=profile&area=profilexxnull
​​
​​ https://www.kuaishou.com/short-video/3xf98wc5q2cuxtq?authorId=3xhv7zhkfr3rqag&streamSource=profile&area=profilexxnull
​​
"""

url = 'https://www.kuaishou.com/graphql'
headers = {
'content-type': 'application/json',
'Cookie': 'kpf=PC_WEB; kpn=KUAISHOU_VISION; clientid=3; did=web_72314bf978cb158dd7034b2370d2ae70',
'Host': 'www.kuaishou.com',
'Origin': 'https://www.kuaishou.com',
'Referer': 'https://www.kuaishou.com/short-video/3x6v3xmcjsd5cki?authorId=3xhv7zhkfr3rqag&streamSource=profile&area=profilexxnull',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
}
data = {
"operationName": "visionProfilePhotoList",
"query": "query visionProfilePhotoList($pcursor: String, $userId: String, $page: String, $webPageArea: String) {\n visionProfilePhotoList(pcursor: $pcursor, userId: $userId, page: $page, webPageArea: $webPageArea) {\n result\n llsid\n webPageArea\n feeds {\n type\n author {\n id\n name\n following\n headerUrl\n headerUrls {\n cdn\n url\n __typename\n }\n __typename\n }\n tags {\n type\n name\n __typename\n }\n photo {\n id\n duration\n caption\n likeCount\n realLikeCount\n coverUrl\n coverUrls {\n cdn\n url\n __typename\n }\n photoUrls {\n cdn\n url\n __typename\n }\n photoUrl\n liked\n timestamp\n expTag\n animatedCoverUrl\n stereoType\n videoRatio\n profileUserTopPhoto\n __typename\n }\n canAddComment\n currentPcursor\n llsid\n status\n __typename\n }\n hostName\n pcursor\n __typename\n }\n}\n",
"variables": {"userId": "3xhv7zhkfr3rqag", "pcursor": pcursor, "page": "detail", "webPageArea": "profilexxnull"}

}
rsp = requests.post(url=url, json=data, headers=headers)
# 第一种方式转成json
# json_data = json.loads(rsp.text)
# 或者
json_data = rsp.json()
# print(json_data, type(json_data))
url_list = json_data['data']['visionProfilePhotoList']['feeds']
pcursor = json_data['data']['visionProfilePhotoList']['pcursor']
# print(url_list)
# pprint.pprint(url_list)

for key in url_list:
# 视屏标题
title = key['photo']['caption']
# print(title)
# 视频url
new_url = key['photo']['photoUrl']
# print(title, new_url)
# 发送请求
content_data = requests.get(url=new_url).content
# 保存目录
with open(f'video/{title}.mp4', mode='wb') as f:
f.write(content_data)
print(f'=======================正在下载标题为 {title} 的快手短视频==========================')
if pcursor != "no_more":
get_page(pcursor)


get_page("")
特别声明:以上内容(图片及文字)均为互联网收集或者用户上传发布,本站仅提供信息存储服务!如有侵权或有涉及法律问题请联系我们。
举报
评论区(0)
按点赞数排序
用户头像
精选文章
thumb 中国研究员首次曝光美国国安局顶级后门—“方程式组织”
thumb 俄乌线上战争,网络攻击弥漫着数字硝烟
thumb 从网络安全角度了解俄罗斯入侵乌克兰的相关事件时间线
下一篇
python爬虫之抓取彼岸壁纸 2023-01-14 22:16:37