按字幕切割音频视频

# -*- coding: utf-8 -*-
"""
cut_by_srt_auto.py
- 自动 pip 安装依赖 (srt, chardet, tqdm)
- 自动匹配置顶文件夹里的媒体文件 (mp4/mp3/wav/mkv等)
- 找到对应同名 .srt 文件
- 按字幕时间切片,并输出到 【原文件名_clips】文件夹
"""

import os
import sys
import subprocess
import importlib.util
from pathlib import Path
import re
from datetime import timedelta

# ==== 自动安装依赖 ====
def pip_install(package):
    try:
        __import__(package)
    except ImportError:
        print(f"[INFO] 安装依赖: {package}")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])

for pkg in ["srt", "chardet", "tqdm"]:
    pip_install(pkg)

import srt
import chardet
from tqdm import tqdm

# ==== 工具函数 ====
def detect_encoding(p):
    raw = Path(p).read_bytes()
    enc = chardet.detect(raw).get("encoding") or "utf-8"
    try:
        raw.decode(enc)
    except Exception:
        enc = "utf-8"
    return enc

def td2ss(t: timedelta):
    return t.total_seconds()

def ss2tc(seconds: float):
    ms = int(round(seconds * 1000))
    h = ms // 3600000
    m = (ms % 3600000) // 60000
    s = (ms % 60000) // 1000
    ms = ms % 1000
    return f"{h:02d}:{m:02d}:{s:02d}.{ms:03d}"

def safe_name(text, keep=30):
    text = re.sub(r"[\\/:*?\"<>|]", "_", text.strip())
    text = re.sub(r"\s+", " ", text)
    return text[:keep] if text else "clip"

# ==== 主逻辑 ====
def main():
    base = Path(__file__).parent  # 当前脚本所在目录(置顶文件夹)
    # 找媒体文件
    media_files = list(base.glob("*.mp4")) + list(base.glob("*.mkv")) + list(base.glob("*.mp3")) + list(base.glob("*.wav"))
    if not media_files:
        print("[ERROR] 没找到媒体文件(支持 mp4/mkv/mp3/wav)")
        return
    media = media_files[0]  # 默认取第一个
    print(f"[INFO] 使用媒体文件: {media}")

    # 找 srt 文件(同名)
    srt_file = media.with_suffix(".srt")
    if not srt_file.exists():
        print(f"[ERROR] 没找到对应字幕文件: {srt_file}")
        return
    print(f"[INFO] 使用字幕文件: {srt_file}")

    # 输出目录
    outdir = base / f"{media.stem}_clips"
    outdir.mkdir(exist_ok=True)

    # 读取字幕
    enc = detect_encoding(srt_file)
    text = srt_file.read_text(encoding=enc, errors="ignore")
    subs = list(srt.parse(text))

    total = 0
    for i, sub in enumerate(tqdm(subs, desc="Cutting", unit="seg"), start=1):
        start_s = td2ss(sub.start)
        end_s = td2ss(sub.end)

        ss = ss2tc(start_s)
        to = ss2tc(end_s)

        snippet = sub.content.replace("\n", " ").strip()
        name_text = safe_name(snippet, keep=20)

        ext = ".mp4" if media.suffix.lower() in [".mp4", ".mkv"] else ".m4a"
        out_path = outdir / f"{i:03d}_{name_text}{ext}"

        cmd = [
            "ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
            "-ss", ss, "-to", to, "-i", str(media),
            "-c:v", "libx264", "-preset", "veryfast", "-crf", "23",
            "-c:a", "aac", "-b:a", "128k",
            str(out_path)
        ]
        try:
            subprocess.run(cmd, check=True)
            total += 1
        except subprocess.CalledProcessError:
            print(f"[WARN] 第 {i} 段导出失败")

    print(f"[完成] 共导出 {total} 段 -> {outdir}")

if __name__ == "__main__":
    main()
❤️ 转载文章请注明出处,谢谢!❤️