#!/bin/bash
#
# Find all duplicate files within the current directory and subdirectories.
# Requires /bin/bash, find, sort, and gawk.
#
find . -type f -ls | sort -n -k 7 | gawk '
function is_identical(fname1,fname2,fsize) {
if (fsize == 0)
return 1
return ! system("diff -q \"" fname1 "\" \"" fname2 "\" >/dev/null 2>&1")
}
function finddups(line,this_fsize) {
for (i = 0; i < 10; ++i)
gsub("^[^ ]* *","",line)
this_fname = line
if (fsize != this_fsize) {
if (fcount > 1) {
for (f1 = 1; f1 < fcount; ++f1) {
for (f2 = f1 + 1; f2 <= fcount; ++f2) {
fname1 = fnames[f1]
fname2 = fnames[f2]
if (is_identical(fname1,fname2,fsize))
printf "%s == %s\n", fname1, fname2
}
}
}
fcount = 0;
}
fsize = this_fsize
fnames[++fcount] = this_fname
}
END {
finddups("",-1)
}
{
finddups($0,$7)
}'