Python_txt

按行切分本文

import os
import chardet

# 设置文件夹路径
folder_path = r'G:\需要切分'

# 遍历文件夹中的每个文件
for root, dirs, files in os.walk(folder_path):
    for file in files:
        file_path = os.path.join(root, file)

        # 只处理文本文件
        if file.endswith('.txt'):
            try:
                # 以二进制模式读取文件内容，用于检测编码
                with open(file_path, 'rb') as f:
                    raw_data = f.read()
                    result = chardet.detect(raw_data)
                    encoding = result['encoding']  # 获取文件编码

                # 读取文件内容
                with open(file_path, 'r', encoding=encoding, errors='ignore') as f:
                    lines = f.readlines()

                # 按行分割并保存为新的txt文件
                for i, line in enumerate(lines):
                    line = line.strip()  # 去除行末的换行符
                    if line:  # 如果行不为空
                        # 使用源文件名加上序号来命名输出文件
                        output_file = os.path.join(root, f'{os.path.splitext(file)[0]}_{i + 1}.txt')
                        with open(output_file, 'w', encoding='utf-8') as output_f:
                            output_f.write(line)

                print(f"已分割文件: {file_path}")

            except Exception as e:
                print(f"处理文件 {file_path} 时出错，错误: {e}")
Python_txt_cut

按行切分本文