change download fifter count
This commit is contained in:
@@ -19,7 +19,16 @@
|
|||||||
## 安装依赖
|
## 安装依赖
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
# 安装 bilibili-api
|
||||||
pip install bilibili-api-python
|
pip install bilibili-api-python
|
||||||
|
|
||||||
|
# 安装 you-get(用于下载视频)
|
||||||
|
pip install you-get
|
||||||
|
|
||||||
|
# 安装其他依赖库
|
||||||
|
pip3 install aiohttp
|
||||||
|
pip3 install httpx
|
||||||
|
pip3 install "curl_cffi"
|
||||||
```
|
```
|
||||||
|
|
||||||
## 安装 ffmpeg
|
## 安装 ffmpeg
|
||||||
|
|||||||
74
spider.py
74
spider.py
@@ -13,6 +13,7 @@ from bilibili_api import HEADERS, get_client, video # pip install bilibili-api-
|
|||||||
BASE_DIR = Path(__file__).resolve().parent
|
BASE_DIR = Path(__file__).resolve().parent
|
||||||
OUTPUT_DIR = BASE_DIR / "downloads"
|
OUTPUT_DIR = BASE_DIR / "downloads"
|
||||||
DOWNLOADED_BVID_FILE = BASE_DIR / "downloaded_bvids.txt" # 已下载 bvid 记录文件
|
DOWNLOADED_BVID_FILE = BASE_DIR / "downloaded_bvids.txt" # 已下载 bvid 记录文件
|
||||||
|
SKIPPED_BVID_FILE = BASE_DIR / "skipped_bvids.txt" # 跳过的 bvid 记录文件
|
||||||
FFMPEG_PATH = None # Will be set by find_ffmpeg()
|
FFMPEG_PATH = None # Will be set by find_ffmpeg()
|
||||||
POLL_SECONDS = 2
|
POLL_SECONDS = 2
|
||||||
FETCH_RELATED_LIMIT = 20
|
FETCH_RELATED_LIMIT = 20
|
||||||
@@ -73,6 +74,15 @@ def save_downloaded_bvid(bvid: str):
|
|||||||
print(f"failed to save bvid {bvid}: {e}")
|
print(f"failed to save bvid {bvid}: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
def save_skipped_bvid(bvid: str, view_count: int, title: str):
|
||||||
|
"""将跳过的 bvid 及其信息追加保存到文件"""
|
||||||
|
try:
|
||||||
|
with open(SKIPPED_BVID_FILE, "a", encoding="utf-8") as f:
|
||||||
|
f.write(f"{bvid}\t{view_count}\t{title}\n")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"failed to save skipped bvid {bvid}: {e}")
|
||||||
|
|
||||||
|
|
||||||
def get_bvid_from_url(url: str) -> str:
|
def get_bvid_from_url(url: str) -> str:
|
||||||
match = re.search(r"BV[0-9A-Za-z]{10}", url)
|
match = re.search(r"BV[0-9A-Za-z]{10}", url)
|
||||||
if not match:
|
if not match:
|
||||||
@@ -81,13 +91,14 @@ def get_bvid_from_url(url: str) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def sanitize_title(title: str, max_length: int = 80) -> str:
|
def sanitize_title(title: str, max_length: int = 80) -> str:
|
||||||
cleaned = re.sub(r'[\\/:*?"<>|]+', "_", title).strip()
|
# 只保留字母、数字和中文,其他全部移除
|
||||||
cleaned = re.sub(r"\s+", "_", cleaned)
|
cleaned = re.sub(r'[^\w\u4e00-\u9fff]', '', title)
|
||||||
cleaned = re.sub(r"_+", "_", cleaned)
|
# 移除连续下划线
|
||||||
cleaned = cleaned.strip("._")
|
cleaned = re.sub(r'_+', '_', cleaned)
|
||||||
|
cleaned = cleaned.strip('_')
|
||||||
if not cleaned:
|
if not cleaned:
|
||||||
cleaned = "video"
|
cleaned = "video"
|
||||||
return cleaned[:max_length].rstrip("._")
|
return cleaned[:max_length].rstrip('_')
|
||||||
|
|
||||||
|
|
||||||
def build_output_file_name(bvid: str, title: str) -> str:
|
def build_output_file_name(bvid: str, title: str) -> str:
|
||||||
@@ -217,15 +228,54 @@ async def download_video_file(bvid: str, output_file_name: str):
|
|||||||
return final_path
|
return final_path
|
||||||
|
|
||||||
|
|
||||||
|
def download_with_youget(bvid: str, title: str, output_dir: Path) -> bool:
|
||||||
|
"""使用 you-get 下载视频"""
|
||||||
|
url = f"https://www.bilibili.com/video/{bvid}"
|
||||||
|
# 生成自定义文件名:bvid_清理后的标题
|
||||||
|
custom_filename = f"{bvid}_{sanitize_title(title)}"
|
||||||
|
try:
|
||||||
|
# 使用 you-get 下载视频,指定文件名,禁用字幕
|
||||||
|
result = subprocess.run(
|
||||||
|
["you-get", "-o", str(output_dir), "-O", custom_filename, "--no-caption", url],
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
check=True
|
||||||
|
)
|
||||||
|
print(f"you-get output: {result.stdout}")
|
||||||
|
return True
|
||||||
|
except subprocess.CalledProcessError as e:
|
||||||
|
print(f"you-get download failed: {e.stderr}")
|
||||||
|
return False
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("you-get not found, please install it: pip install you-get")
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def process_download(bvid: str):
|
def process_download(bvid: str):
|
||||||
"""处理单个视频的下载"""
|
"""处理单个视频的下载"""
|
||||||
try:
|
try:
|
||||||
# 获取视频信息
|
# 获取视频信息
|
||||||
info, related_items = asyncio.run(fetch_video_info_and_related(bvid))
|
info, related_items = asyncio.run(fetch_video_info_and_related(bvid))
|
||||||
title = info.get("title", bvid)
|
title = info.get("title", bvid)
|
||||||
file_name = build_output_file_name(bvid, title)
|
|
||||||
|
|
||||||
print(f"fetching info done: {bvid}, title: {title}")
|
# 获取观看量
|
||||||
|
view_count = info.get("stat", {}).get("view", 0)
|
||||||
|
|
||||||
|
print(f"fetching info done: {bvid}, title: {title}, views: {view_count}")
|
||||||
|
|
||||||
|
# 判断观看量是否超过 50w (500000)
|
||||||
|
if view_count < 500000:
|
||||||
|
print(f"skipped: {bvid}, view count {view_count} < 500000")
|
||||||
|
# 记录跳过的视频信息
|
||||||
|
save_skipped_bvid(bvid, view_count, title)
|
||||||
|
# 仍然添加相关视频到队列
|
||||||
|
inserted, skipped = save_related_bvids(bvid, related_items)
|
||||||
|
print(f"related videos: inserted={inserted}, skipped={skipped}")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"view count {view_count} >= 500000, downloading...")
|
||||||
|
|
||||||
|
file_name = build_output_file_name(bvid, title)
|
||||||
|
|
||||||
# 检查文件是否已存在
|
# 检查文件是否已存在
|
||||||
final_path = get_output_path(file_name)
|
final_path = get_output_path(file_name)
|
||||||
@@ -235,14 +285,18 @@ def process_download(bvid: str):
|
|||||||
downloaded_set.add(bvid)
|
downloaded_set.add(bvid)
|
||||||
return
|
return
|
||||||
|
|
||||||
# 下载视频
|
# 使用 you-get 下载视频
|
||||||
final_path = asyncio.run(download_video_file(bvid, file_name))
|
success = download_with_youget(bvid, title, OUTPUT_DIR)
|
||||||
print(f"download done: {bvid} -> {final_path}")
|
|
||||||
|
if success:
|
||||||
|
print(f"download done: {bvid}")
|
||||||
|
|
||||||
# 标记为已下载并保存到文件
|
# 标记为已下载并保存到文件
|
||||||
with queue_lock:
|
with queue_lock:
|
||||||
downloaded_set.add(bvid)
|
downloaded_set.add(bvid)
|
||||||
save_downloaded_bvid(bvid)
|
save_downloaded_bvid(bvid)
|
||||||
|
else:
|
||||||
|
print(f"download failed: {bvid}")
|
||||||
|
|
||||||
# 添加相关视频到队列
|
# 添加相关视频到队列
|
||||||
inserted, skipped = save_related_bvids(bvid, related_items)
|
inserted, skipped = save_related_bvids(bvid, related_items)
|
||||||
|
|||||||
Reference in New Issue
Block a user