Python obtains Douyin search results and batch downloads watermark-free videos.

Installing playwright:

pip install playwright

Complete code:


import re
import os
import asyncio
import requests
from playwright.async_api import async_playwright

def save_douyin_videos(keyword, file_name, video_url):
    folder_name = keyword  # Use the keyword to name the folder

    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    file_path = os.path.join(folder_name, filter_filename(file_name).replace("...展开","") + ".mp4")
    with open(file_path, 'wb') as f:
        response = requests.get(video_url, stream=True)
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
def filter_filename(filename):
    filtered_filename = re.sub(r'[\\/:*?"<>| ]+', '', filename)
    return filtered_filename

async def search_douyin_videos(keyword, scroll_count):
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        page = await browser.new_page()
        await page.set_viewport_size({"width": 1280, "height": 800})
        await page.goto('https://www.douyin.com/search/搜索')
        await asyncio.sleep(3)
        await asyncio.sleep(3)
        # Enter "python" in the input box
        await page.fill('input', keyword+"视频")
        # Click the search button
        await page.click('.rB8dMXOc')
        await asyncio.sleep(3)
        
        links = await page.query_selector_all('.BL9IYM4m')
        arr = []
        for _ in range(scroll_count):
            xgplayers = await page.query_selector_all('.xgplayer')
            for player in xgplayers:
                video_src = await player.inner_html()
                arr.append(video_src)


            await page.evaluate('window.scrollBy(0, window.innerHeight)')
            await asyncio.sleep(2)
        
        titles = await page.query_selector_all('.KxCuain0')
        print(len(arr))
        print(len(titles))
        for i in range(len(arr)):
            get_title = await titles[i].evaluate('(element) => element.textContent')
            video_src_list = re.findall(r'<video.*?src=["\'](.*?)["\']', arr[i])
            for video_src in video_src_list:
                print(f"Title: {get_title}")
                print(f"Video Source: https:{video_src}")
                save_douyin_videos(keyword, get_title, "https:" + video_src)
                print("-----------")

        await browser.close()

# Call the function to perform the search
asyncio.run(search_douyin_videos('浅跳一下', 20))#Keyword and number of videos to download

Output: