安装 playwright:
pip install playwright
完整代码:
import re
import os
import asyncio
import requests
from playwright.async_api import async_playwright
def save_douyin_videos(keyword, file_name, video_url):
folder_name = keyword # 使用关键词命名文件夹
if not os.path.exists(folder_name):
os.makedirs(folder_name)
file_path = os.path.join(folder_name, filter_filename(file_name).replace("...展开","") + ".mp4")
with open(file_path, 'wb') as f:
response = requests.get(video_url, stream=True)
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
def filter_filename(filename):
filtered_filename = re.sub(r'[\\/:*?"<>| ]+', '', filename)
return filtered_filename
async def search_douyin_videos(keyword, scroll_count):
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
await page.set_viewport_size({"width": 1280, "height": 800})
await page.goto('https://www.douyin.com/search/搜索')
await asyncio.sleep(3)
await asyncio.sleep(3)
# 在输入框中输入“python”
await page.fill('input', keyword+"视频")
# 点击搜索按钮
await page.click('.rB8dMXOc')
await asyncio.sleep(3)
links = await page.query_selector_all('.BL9IYM4m')
arr = []
for _ in range(scroll_count):
xgplayers = await page.query_selector_all('.xgplayer')
for player in xgplayers:
video_src = await player.inner_html()
arr.append(video_src)
await page.evaluate('window.scrollBy(0, window.innerHeight)')
await asyncio.sleep(2)
titles = await page.query_selector_all('.KxCuain0')
print(len(arr))
print(len(titles))
for i in range(len(arr)):
get_title = await titles[i].evaluate('(element) => element.textContent')
video_src_list = re.findall(r'<video.*?src=["\'](.*?)["\']', arr[i])
for video_src in video_src_list:
print(f"标题: {get_title}")
print(f"视频源: https:{video_src}")
save_douyin_videos(keyword, get_title, "https:" + video_src)
print("-----------")
await browser.close()
# 调用函数进行搜索
asyncio.run(search_douyin_videos('浅跳一下', 20))#关键词和视频下载数量
输出: