[投稿]月月大包去重脚本

投稿
投稿
发布于 2025-02-08 / 99 阅读
0
0

[投稿]月月大包去重脚本

#PT

脚本来自@不识君ᯤ⁶ᴳ投稿

脚本文件

点击下载脚本文件:py_auto_hard_link.py

注意:python脚本,运行需要python环境

修改参数

其中需要修改的参数有dir1 dir2 dir3_to_delete i_filesize_min

# 如有重复,移除dir1里的文件(注意不删文件夹,因为link也要用到)
dir1 = "/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS"
# 是否与dir2重复
dir2 = "/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS"
# 配置移动相同文件到dir3;如果为空字符串则直接删除文件;如非空字符串但该目录已有文件,建议清空,否则报错
dir3_to_delete = '/volume3/p_video3/video3/to_del'

# 忽略100MB以内的文件(包括nfo/jpg等)
i_filesize_min = 100 * 1024 * 1024

运行示例

以Ubuntu24.04为例,假设/home/test目录下有以下文件结构

其中DJI_0153.MP4和DJI_0154.MP4是重复的

iShot_2025-02-09_12.31.49.jpg

此时脚本内的参数已经设置为

# 文件夹1的路径
dir1 = "/home/test/test1"
# 文件夹2的路径
dir2 = "/home/test/test2"
# 文件夹3的路径,仅作测试,实际使用建议留空
dir3_to_delete = '/home/test/test3'

# 忽略1MB以内的文件,你也可以设置为0
i_filesize_min = 1 * 1024 * 1024

运行python脚本

sudo python3 py_auto_hard_link.py

iShot_2025-02-09_12.34.32.jpg

运行后的文件结构,其中test1文件夹中的DJI_0153.MP4和DJI_0154.MP4已被删除并替换为硬链接

附录

完整代码内容

#!/usr/bin/env python
# coding: utf-8
# by Jiny on 2025-2-8
'''
用途:删除目录1中与目录2相同的文件并在目录1中建立硬链接
      适用于影视文件大包去重,如依据Douban_IMDB混合大包去重DoubanTop250合集中的文件
      
注意事项:
    3个目录要求在同一volume上
    只判断文件名称和大小相同,不判断文件内容;
    因环境各异,不为运行结果负任何责任;介意勿用。
    
建议按此运行以留存日志:
python3 /volume3/your_path/ > /volume3/your_path/py_auto_hard_link.log
'''

import os
import shutil


# 如有重复,移除dir1里的文件(注意不删文件夹,因为link也要用到)
dir1 = "/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS"
# 是否与dir2重复
dir2 = "/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS"
# 配置移动相同文件到dir3;如果为空字符串则直接删除文件;如非空字符串但该目录已有文件,建议清空,否则报错
dir3_to_delete = '/volume3/p_video3/video3/to_del'

# 忽略100MB以内的文件(包括nfo/jpg等)
i_filesize_min = 100 * 1024 * 1024

def move_file(source_file, destination_directory):
    # 检查目标目录是否存在,如果不存在则创建
    if not os.path.exists(destination_directory):
        os.makedirs(destination_directory)

    try:
        # 移动文件
        destination_path = shutil.move(source_file, destination_directory)
        print(f"文件移动成功,目标路径: {destination_path}")
    except Exception as e:
        print(f"文件移动失败: {e}")

def get_file_info(dir1):
    # 获取目录1中的所有文件
    files1 = {}
    len_dir1 = len(dir1) + 1
    for root, _, filenames in os.walk(dir1):
        for filename in filenames:
            file_path = os.path.join(root, filename)
            # 获取文件大小
            file_size = os.path.getsize(file_path)
            if file_size>i_filesize_min:            
                # 取相对路径
                relative_path = file_path[len_dir1:]
                # 以相对路径的文件名作为键,文件大小作为值存储在字典中
                files1[relative_path] = file_size
    return files1

files1 = get_file_info(dir1)
files2 = get_file_info(dir2)

# 统计数据
i_count_all = 0
i_count_del = 0
i_count_link = 0
i_file_size = 0

print('比较————相同文件夹、文件名、文件大小:')
'''
硬链接示例:
/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS/少年派的奇幻漂流.Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv
/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS/少年派的奇幻漂流.Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/Life.of.Pi.2012.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv

'''
list_same_fullname = []
# 比较文件名称和大小
for filename in files1.keys():
    i_count_all += 1
    if filename in files2.keys():
        if files1[filename] == files2[filename]:
            # 文件名相同'
            path_a = os.path.join(dir1, filename)
            if dir3_to_delete == '':
                os.remove(path_a)
                i_count_del += 1
            else:
                move_file(path_a, dir3_to_delete)
                pass
            path_b = os.path.join(dir2, filename)
            print("----Hard link created for:")
            print(path_a)
            print(path_b)
            os.link(path_b, path_a)

            i_count_link += 1
            i_file_size += files1[filename]

            list_same_fullname.append(filename)
        else:
            print('Not same filesize: %s' % filename)
    else:
        # print('Not exists same filename: %s' % filename)
        pass

print('比较————不同文件夹、相同文件名、文件大小:')
'''
硬链接示例:
/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS/Back.to.Back.Face.to.Face.1994.Bluray.1080p.x265.10bit.FLAC.MNHD-FRDS.mkv
/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS/背靠背,脸对脸.Back.to.Back.Face.to.Face.1994.Bluray.1080p.x265.10bit.FLAC.MNHD-FRDS/Back.to.Back.Face.to.Face.1994.Bluray.1080p.x265.10bit.FLAC.MNHD-FRDS.mkv

/volume3/video3/movies/DouBan.2022.11.11.Top.250.BluRay.1080p.x265.10bit.MNHD-FRDS/黑客帝国.The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv
/volume3/video3/movies/DouBan_IMDB.TOP250.Movies.Mixed.Collection.20240501.FRDS/黑客帝国合集.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/黑客帝国.The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS/The.Matrix.1999.BluRay.1080p.x265.10bit.2Audio.MNHD-FRDS.mkv
'''
list1_path = []
list2_path = []
for filename in list_same_fullname:
    del files1[filename]
    del files2[filename]
print('    剩余 %d 个文件' % len(files1))
print('    剩余 %d 个文件' % len(files2))
# 相对路径文件名 改 文件
files1_name_only = {}
for filename in files1.keys():
    file_path = os.path.join(dir1, filename)
    files1_name_only[os.path.basename(file_path)] = files1[filename]
    list1_path.append(os.path.dirname(file_path))
files2_name_only = {}
for filename in files2.keys():
    file_path = os.path.join(dir2, filename)
    files2_name_only[os.path.basename(file_path)] = files2[filename]
    list2_path.append(os.path.dirname(file_path))

list_keys1 = list(files1_name_only.keys())
list_keys2 = list(files2_name_only.keys())
for i_index in range(len(list_keys1)):
    filename = list_keys1[i_index]
    if filename in files2_name_only.keys():
        if files1_name_only[filename] == files2_name_only[filename]:
            # 文件名相同'
            path_a = os.path.join(dir1, list1_path[i_index], filename)
            if dir3_to_delete == '':
                os.remove(path_a)
                i_count_del += 1
            else:
                move_file(path_a, dir3_to_delete)
                pass

            i_index2 = list_keys2.index(filename)
            path_b = os.path.join(dir2, list2_path[i_index2], filename)
            print("----Hard link created for:")
            print(path_a)
            print(path_b)
            os.link(path_b, path_a)
            i_count_link += 1
            i_file_size += files1_name_only[filename]
        else:
            print('Not same filesize: %s' % filename)
    else:
        # print('Not exists same filename: %s' % filename)
        pass


print('运行情况: ')
print('    为 %d个文件做了 %d个硬链接' % (i_count_all, i_count_link))
formatted_gb = "%.2f" % (i_file_size/1024/1024/1024)
print('    文件大小共 %s GB(可节省的磁盘空间)' % formatted_gb)
if i_count_del>0:
    print('    删除了 %d个文件' % (i_count_del))
else:
    print('    移动了 %d个文件到%s,可确认后手动删除' % (i_count_link, dir3_to_delete))
print('程序运行完毕')

安装python(群晖NAS)

群晖系统默认安装有python

SSH连接到群晖NAS后,输入python会显示已安装的版本信息(输入quit() 退出)

如果你的系统没有python或者需要其他版本的python

  • 套件中心->开源->Python3.9

  • 套件来源设置矿神源->社群->Python3.8/3.10/3.11/3.12/3.13
    矿神源:https://spk7.imnks.com/


评论