按行切分本文
import os
import chardet
# 设置文件夹路径
folder_path = r'G:\需要切分'
# 遍历文件夹中的每个文件
for root, dirs, files in os.walk(folder_path):
for file in files:
file_path = os.path.join(root, file)
# 只处理文本文件
if file.endswith('.txt'):
try:
# 以二进制模式读取文件内容,用于检测编码
with open(file_path, 'rb') as f:
raw_data = f.read()
result = chardet.detect(raw_data)
encoding = result['encoding'] # 获取文件编码
# 读取文件内容
with open(file_path, 'r', encoding=encoding, errors='ignore') as f:
lines = f.readlines()
# 按行分割并保存为新的txt文件
for i, line in enumerate(lines):
line = line.strip() # 去除行末的换行符
if line: # 如果行不为空
# 使用源文件名加上序号来命名输出文件
output_file = os.path.join(root, f'{os.path.splitext(file)[0]}_{i + 1}.txt')
with open(output_file, 'w', encoding='utf-8') as output_f:
output_f.write(line)
print(f"已分割文件: {file_path}")
except Exception as e:
print(f"处理文件 {file_path} 时出错,错误: {e}")