脚本来自@不识君ᯤ⁶ᴳ投稿
脚本文件
点击下载脚本文件:py_auto_hard_link.py
注意:python脚本,运行需要python环境
修改参数
其中需要修改的参数有dir1
dir2
dir3_to_delete
i_filesize_min
# 如有重复,移除dir1里的文件(注意不删文件夹,因为link也要用到)
dir1 = "/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS"
# 是否与dir2重复
dir2 = "/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS"
# 配置移动相同文件到dir3;如果为空字符串则直接删除文件;如非空字符串但该目录已有文件,建议清空,否则报错
dir3_to_delete = '/volume3/p_video3/video3/to_del'
# 忽略100MB以内的文件(包括nfo/jpg等)
i_filesize_min = 100 * 1024 * 1024
运行示例
以Ubuntu24.04为例,假设/home/test目录下有以下文件结构
其中DJI_0153.MP4和DJI_0154.MP4是重复的
此时脚本内的参数已经设置为
# 文件夹1的路径
dir1 = "/home/test/test1"
# 文件夹2的路径
dir2 = "/home/test/test2"
# 文件夹3的路径,仅作测试,实际使用建议留空
dir3_to_delete = '/home/test/test3'
# 忽略1MB以内的文件,你也可以设置为0
i_filesize_min = 1 * 1024 * 1024
运行python脚本
sudo python3 py_auto_hard_link.py
运行后的文件结构,其中test1文件夹中的DJI_0153.MP4和DJI_0154.MP4已被删除并替换为硬链接
附录
完整代码内容
#!/usr/bin/env python
# coding: utf-8
# by Jiny on 2025-2-8
'''
用途:删除目录1中与目录2相同的文件并在目录1中建立硬链接
适用于影视文件大包去重,如依据Douban_IMDB混合大包去重DoubanTop250合集中的文件
注意事项:
3个目录要求在同一volume上
只判断文件名称和大小相同,不判断文件内容;
因环境各异,不为运行结果负任何责任;介意勿用。
建议按此运行以留存日志:
python3 /volume3/your_path/ > /volume3/your_path/py_auto_hard_link.log
'''
import os
import shutil
# 如有重复,移除dir1里的文件(注意不删文件夹,因为link也要用到)
dir1 = "/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS"
# 是否与dir2重复
dir2 = "/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS"
# 配置移动相同文件到dir3;如果为空字符串则直接删除文件;如非空字符串但该目录已有文件,建议清空,否则报错
dir3_to_delete = '/volume3/p_video3/video3/to_del'
# 忽略100MB以内的文件(包括nfo/jpg等)
i_filesize_min = 100 * 1024 * 1024
def move_file(source_file, destination_directory):
# 检查目标目录是否存在,如果不存在则创建
if not os.path.exists(destination_directory):
os.makedirs(destination_directory)
try:
# 移动文件
destination_path = shutil.move(source_file, destination_directory)
print(f"文件移动成功,目标路径: {destination_path}")
except Exception as e:
print(f"文件移动失败: {e}")
def get_file_info(dir1):
# 获取目录1中的所有文件
files1 = {}
len_dir1 = len(dir1) + 1
for root, _, filenames in os.walk(dir1):
for filename in filenames:
file_path = os.path.join(root, filename)
# 获取文件大小
file_size = os.path.getsize(file_path)
if file_size>i_filesize_min:
# 取相对路径
relative_path = file_path[len_dir1:]
# 以相对路径的文件名作为键,文件大小作为值存储在字典中
files1[relative_path] = file_size
return files1
files1 = get_file_info(dir1)
files2 = get_file_info(dir2)
# 统计数据
i_count_all = 0
i_count_del = 0
i_count_link = 0
i_file_size = 0
print('比较————相同文件夹、文件名、文件大小:')
'''
硬链接示例:
/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS/少年派的奇幻漂流.Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv
/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS/少年派的奇幻漂流.Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv
'''
list_same_fullname = []
# 比较文件名称和大小
for filename in files1.keys():
i_count_all += 1
if filename in files2.keys():
if files1[filename] == files2[filename]:
# 文件名相同'
path_a = os.path.join(dir1, filename)
if dir3_to_delete == '':
os.remove(path_a)
i_count_del += 1
else:
move_file(path_a, dir3_to_delete)
pass
path_b = os.path.join(dir2, filename)
print("----Hard link created for:")
print(path_a)
print(path_b)
os.link(path_b, path_a)
i_count_link += 1
i_file_size += files1[filename]
list_same_fullname.append(filename)
else:
print('Not same filesize: %s' % filename)
else:
# print('Not exists same filename: %s' % filename)
pass
print('比较————不同文件夹、相同文件名、文件大小:')
'''
硬链接示例:
/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS/Back.to.Back.Face.to.Face.1994.Bluray.1080p.x265.10bit.FLAC.MNHD-FRDS.mkv
/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS/背靠背,脸对脸.Back.to.Back.Face.to.Face.1994.Bluray.1080p.x265.10bit.FLAC.MNHD-FRDS/Back.to.Back.Face.to.Face.1994.Bluray.1080p.x265.10bit.FLAC.MNHD-FRDS.mkv
/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS/黑客帝国.The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv
/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS/黑客帝国合集.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/黑客帝国.The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv
'''
list1_path = []
list2_path = []
for filename in list_same_fullname:
del files1[filename]
del files2[filename]
print(' 剩余 %d 个文件' % len(files1))
print(' 剩余 %d 个文件' % len(files2))
# 相对路径文件名 改 文件
files1_name_only = {}
for filename in files1.keys():
file_path = os.path.join(dir1, filename)
files1_name_only[os.path.basename(file_path)] = files1[filename]
list1_path.append(os.path.dirname(file_path))
files2_name_only = {}
for filename in files2.keys():
file_path = os.path.join(dir2, filename)
files2_name_only[os.path.basename(file_path)] = files2[filename]
list2_path.append(os.path.dirname(file_path))
list_keys1 = list(files1_name_only.keys())
list_keys2 = list(files2_name_only.keys())
for i_index in range(len(list_keys1)):
filename = list_keys1[i_index]
if filename in files2_name_only.keys():
if files1_name_only[filename] == files2_name_only[filename]:
# 文件名相同'
path_a = os.path.join(dir1, list1_path[i_index], filename)
if dir3_to_delete == '':
os.remove(path_a)
i_count_del += 1
else:
move_file(path_a, dir3_to_delete)
pass
i_index2 = list_keys2.index(filename)
path_b = os.path.join(dir2, list2_path[i_index2], filename)
print("----Hard link created for:")
print(path_a)
print(path_b)
os.link(path_b, path_a)
i_count_link += 1
i_file_size += files1_name_only[filename]
else:
print('Not same filesize: %s' % filename)
else:
# print('Not exists same filename: %s' % filename)
pass
print('运行情况: ')
print(' 为 %d个文件做了 %d个硬链接' % (i_count_all, i_count_link))
formatted_gb = "%.2f" % (i_file_size/1024/1024/1024)
print(' 文件大小共 %s GB(可节省的磁盘空间)' % formatted_gb)
if i_count_del>0:
print(' 删除了 %d个文件' % (i_count_del))
else:
print(' 移动了 %d个文件到%s,可确认后手动删除' % (i_count_link, dir3_to_delete))
print('程序运行完毕')
安装python(群晖NAS)
群晖系统默认安装有python
SSH连接到群晖NAS后,输入python
会显示已安装的版本信息(输入quit()
退出)
如果你的系统没有python或者需要其他版本的python
套件中心->开源->Python3.9
套件来源设置矿神源->社群->Python3.8/3.10/3.11/3.12/3.13
矿神源:https://spk7.imnks.com/