Hi Howard
Forgive me, but, thanks to Taaz pointing the issue with SL.Find, I worked this out for myself in the end late last night using Pos, RPosex and using ':' as delimiters between filenames and hash values in the stringlists. I have pasted my revised procedure below for the benefit of others.
That said, having examined your demo project, there is much I'd like to take from that so I will probably merge the two solutions.
I'm very thankful to you for taking the time to assist, and if you wish to comment on the procedure above (ways to make it better, faster etc) please feel free.
procedure TMainForm.btnCompareClick(Sender: TObject);
var
DirA, DirB, FilePath, FileName, FullPathAndName, FileHashA, FileHashB,
HashOfListA, HashOfListB, Mismatch, MissingHash, s, ExtractedFileName : string;
TotalFilesDirA, TotalFilesDirB, // Stringlists just for the file names
HashListA, HashListB, // Stringlists just for the hashes of each file in each directory
FileAndHashListA, FileAndHashListB, // Stringlists for the combined lists of both hashes with filenames
MisMatchList
: TStringList;
i, indexA, indexB, HashPosStart , FileNameAndPathPosStart, FileNameAndPathPosEnd : integer;
begin
i := 0;
indexA := 0;
indexB := 0;
HashPosStart := 0;
FileNameAndPathPosStart := 0;
FileNameAndPathPosEnd := 0;
DirA := lblDirAName.Caption;
DirB := lblDirBName.Caption;
try
// First, list and hash the files in DirA
TotalFilesDirA := TStringList.Create;
TotalFilesDirA.Sorted := true;
TotalFilesDirA := FindAllFiles(DirA, '*', True);
TotalFilesDirA.Sort;
sgDirA.RowCount := TotalFilesDirA.Count + 1;
HashListA := TStringList.Create;
FileAndHashListA := TStringList.Create;
HashListA.Sorted := true;
FileAndHashListA.Sorted := true;
for i := 0 to TotalFilesDirA.Count -1 do
begin
FilePath := ExtractFilePath(TotalFilesDirA.Strings[i]);
FileName := ExtractFileName(TotalFilesDirA.Strings[i]);
FullPathAndName := FilePath + FileName;
FileHashA := CalcTheHashFile(FullPathAndName);
HashListA.Add(FileHashA);
FileAndHashListA.Add(FullPathAndName + ':' + FileHashA + ':');
// Populate display grid for DirA
sgDirA.Cells[0, i+1] := IntToStr(i+1);
sgDirA.Cells[1, i+1] := FullPathAndName;
sgDirA.Cells[2, i+1] := FileHashA;
sgDirA.Row := i;
sgDirA.col := 1;
end;
HashListA.Sort;
lblTotalFileCountNumberA.Caption := IntToStr(TotalFilesDirA.Count);
// Then, list and hash the files in DirB
TotalFilesDirB := TStringList.Create;
TotalFilesDirB.Sorted := true;
TotalFilesDirB := FindAllFiles(DirB, '*', True);
TotalFilesDirB.Sort;
sgDirB.RowCount := TotalFilesDirB.Count + 1;
HashListB := TStringList.Create;
FileAndHashListB := TStringList.Create;
HashListB.Sorted := true;
FileAndHashListB.Sorted := true;
for i := 0 to TotalFilesDirB.Count -1 do
begin
FilePath := ExtractFilePath(TotalFilesDirB.Strings[i]);
FileName := ExtractFileName(TotalFilesDirB.Strings[i]);
FullPathAndName := FilePath + FileName;
FileHashB := CalcTheHashFile(FullPathAndName);
HashListB.Add(FileHashB);
FileAndHashListB.Add(FullPathAndName + ':' + FileHashB + ':');
// Populate display grid for DirB
sgDirB.Cells[0, i+1] := IntToStr(i+1);
sgDirB.Cells[1, i+1] := FullPathAndName;
sgDirB.Cells[2, i+1] := FileHashB;
sgDirB.Row := i;
sgDirB.col := 1;
end;
HashListB.Sort;
FileAndHashListB.Sort;
lblTotalFileCountNumberB.Caption := IntToStr(TotalFilesDirB.Count);
// Now work out where the differences are.
// Start by establishing if the dirs are identical : same no of files + same hashes = matching dirs
if TotalFilesDirB.Count > TotalFilesDirA.Count then
begin
lblFileCountDiffB.Caption := IntToStr(TotalFilesDirB.Count - TotalFilesDirA.Count);
end
else if TotalFilesDirA.Count > TotalFilesDirB.Count then
begin
lblFileCountDiffB.Caption := IntToStr(TotalFilesDirA.Count - TotalFilesDirB.Count);
end
else lblFileCountDiffB.Caption := '0';
{ If there is no difference between file count, then if all the files are
actually the same files, the hash lists themselves will be identical if there
were no errors or no file mistmatches.
So instead of comparing each hash line by line, just hash the list and see if they match
However, we don't know whether DirA or DirB is the one that might have most files in,
so we do a count of each subtracted by the other
}
if ((TotalFilesDirB.Count - TotalFilesDirA.Count) = 0) or ((TotalFilesDirA.Count - TotalFilesDirB.Count) = 0) then
begin
HashOfListA := SHA1Print(SHA1String(HashListA.Text));
HashOfListB := SHA1Print(SHA1String(HashListB.Text));
if HashOfListA = HashOfListB then
begin
lblHashMatchB.Caption:= 'MATCH!';
end
end;
// If both matched, the previous loop will have been executed.
// If, however, one dir has a higher count than the other, the following loop runs
// Start of Mis-Match Loop:
if (TotalFilesDirB.Count < TotalFilesDirA.Count) or (TotalFilesDirB.Count > TotalFilesDirA.Count) then
begin
lblHashMatchB.Caption:= 'Mis-MATCH!';
FileAndHashListA.Sort;
FileAndHashListB.Sort;
try
MismatchList := TStringList.Create;
// Check the content of ListB against ListA
for i := 0 to HashListB.Count -1 do
begin
if not HashListA.Find(HashListB.Strings[i], indexA) then
begin
MissingHash := HashListB.Strings[i];
HashPosStart := Pos(MissingHash, FileAndHashListB.Text);
FileNameAndPathPosEnd := RPosEx(':', FileAndHashListB.Text, HashPosStart);
FileNameAndPathPosStart := RPosEx(':', FileAndHashListB.Text, FileNameAndPathPosEnd -1);
if (HashPosStart > 0) and (FileNameAndPathPosStart > 0) and (FileNameAndPathPosEnd > 0) then
begin
ExtractedFileName := Copy(FileAndHashListB.Text, FileNameAndPathPosStart -1, (FileNameAndPathPosEnd - FileNameAndPathPosStart) +1);
MisMatchList.Add(ExtractedFileName + ' ' + MissingHash + ' is NOT in both directories');
end;
end;
end;
// Check the content of ListA against ListB
for i := 0 to HashListA.Count -1 do
begin
if not HashListB.Find(HashListA.Strings[i], indexA) then
begin
MissingHash := HashListA.Strings[i];
HashPosStart := Pos(MissingHash, FileAndHashListA.Text);
FileNameAndPathPosEnd := RPosEx(':', FileAndHashListA.Text, HashPosStart);
FileNameAndPathPosStart := RPosEx(':', FileAndHashListA.Text, FileNameAndPathPosEnd -1);
if (HashPosStart > 0) and (FileNameAndPathPosStart > 0) and (FileNameAndPathPosEnd > 0) then
begin
ExtractedFileName := Copy(FileAndHashListA.Text, FileNameAndPathPosStart -1, (FileNameAndPathPosEnd - FileNameAndPathPosStart) +1);
MisMatchList.Add(ExtractedFileName + ' ' + MissingHash + ' found in both directories');
end;
end;
end;
// This next check is probably unnecessary because the above two for loops
// are only executed if the number of files differ anyway. If they don't differ
// none of this if (TotalFilesDirB.Count < TotalFilesDirA.Count) or (TotalFilesDirB.Count > TotalFilesDirA.Count) then is run
// But, just as s secondary validation, we will check. It only takes a millisecond.
if (MisMatchList.Count > 0) then
begin
ShowMessage(MismatchList.Text)
end
else
ShowMessageFmt('Dir A and Dir B contain %d identical files',[HashListB.Count]);
finally // Finally for MisMatch
if assigned (MisMatchList) then MismatchList.Free;
end;
end; // End of mis-match loop
finally
HashListA.Free;
TotalFilesDirA.Free;
FileAndHashListA.Free;
TotalFilesDirB.Free;
FileAndHashListB.Free;
HashListB.Free;
end;
end;