python版本
`import os
import hashlib
from collections import defaultdict
def file_hash(path, chunk_size=8192):
h = hashlib.md5()
with open(path, ‘rb’) as f:
while chunk := f.read(chunk_size):
h.update(chunk)
return h.hexdigest()
def find_duplicates(root_dir):
hash_map = defaultdict(list)
for dirpath, _, filenames in os.walk(root_dir):
for name in filenames:
path = os.path.join(dirpath, name)
try:
file_md5 = file_hash(path)
hash_map[file_md5].append(path)
except Exception as e:
print(f"Error reading {path}: {e}")
输出重复文件
for files in hash_map.values():
if len(files) > 1:
print(“\n[重复文件]”)
for f in files:
print(f)
find_duplicates(“C:\your\folder\path”)
`
或者powershell
powershell -command "Get-ChildItem -Recurse -File | ForEach-Object { $_ | Add-Member -NotePropertyName Hash -NotePropertyValue ((Get-FileHash $_.FullName -Algorithm MD5).Hash) -PassThru } | Group-Object Hash | Where-Object { $_.Count -gt 1 } | ForEach-Object { $_.Group | Select-Object FullName }"
或者cmd
`@echo off
setlocal ENABLEDELAYEDEXPANSION
:: 临时存放哈希值的文件
set hashlist=hashes.txt
del %hashlist% >nul 2>&1
echo 正在计算文件哈希值,请稍候…
for /R %%F in (.) do (
for /f “tokens=1” %%H in (‘certutil -hashfile “%%F” MD5 ^| findstr /R “^[0-9a-fA-F]”’) do (
echo %%H “%%F” >> %hashlist%
)
)
echo 查找重复文件:
sort %hashlist% | findstr /D /C:"" > sorted.txt
:: 输出重复项
for /f “tokens=1,* delims= ” %%A in (‘type sorted.txt’) do (
findstr /C:“%%A” sorted.txt | find /C /V "" > count.txt
set /p count= < count.txt
if !count! GTR 1 (
echo.
echo [重复文件:MD5=%%A]
findstr /C:“%%A” sorted.txt
)
)
del count.txt
`