Originally Posted By: mlord
Perhaps write a small bash/awk script to do the job?


Like this, perhaps?
Code:
#!/bin/bash
#
# Find all duplicate files within the current directory and subdirectories.
# Requires /bin/bash, find, sort, and gawk.
#

find . -type f -ls | sort -n -k 7 | gawk '

function is_identical(fname1,fname2,fsize) {
        if (fsize == 0)
                return 1
        return ! system("diff -q \"" fname1 "\" \"" fname2 "\" >/dev/null 2>&1")
}

function finddups(line,this_fsize) {
        for (i = 0; i < 10; ++i)
                gsub("^[^ ]*  *","",line)
        this_fname = line

        if (fsize != this_fsize) {
                if (fcount > 1) {
                        for (f1 = 1; f1 < fcount; ++f1) {
                                for (f2 = f1 + 1; f2 <= fcount; ++f2) {
                                        fname1 = fnames[f1]
                                        fname2 = fnames[f2]
                                        if (is_identical(fname1,fname2,fsize))
                                                printf "%s == %s\n", fname1, fname2
                                }
                        }
                }
                fcount = 0;
        }
        fsize = this_fsize
        fnames[++fcount] = this_fname
}

END {
        finddups("",-1)
}

{
        finddups($0,$7)
}'