program OccurrenceCounter;
{$mode delphi}
{$ImplicitExceptions Off}
{$MODESWITCH NESTEDPROCVARS}
uses
Classes, SysUtils, DateUtils,
Generics.Defaults, Generics.Collections,
LGUtils, LGHashMultiSet, LGArrayHelpers,
gutil, garrayutils, gvector, gmap,
WinSortCount3;
type
TIntPair = TPair<LongInt, LongInt>;
TProcedureArray = array of procedure;
function ComparePairs(constref L, R: TIntPair): LongInt;
begin
if L.Key < R.Key then
Result := -1
else if L.Key = R.Key then
Result := 0
else
Result := 1;
end;
var
Total, Unique, repeatCount, randomrange: Integer;
Start: TDateTime;
inFilename: String = 'data.txt';
outFilename: String = 'sorted.txt';
routineName: String;
procedures: TProcedureArray;
proc: procedure;
procedure GenerateData(randomRange: Integer=8; repeatMillionsCount: Integer=2);
var
InFile: Text;
I: LongInt;
begin
Assign(InFile, inFilename);
Rewrite(InFile);
for I := 1 to repeatMillionsCount * 1000000 do
WriteLn(InFile, 1500000000 + Random(randomRange * 100000));
Close(InFile);
end;
procedure SortCountAkira;
var
I: LongInt;
InOut: Text;
Map: TDictionary<LongInt, LongInt>;
Pair: TIntPair;
Pairs: TArray<TIntPair>;
begin
routineName := {$I %currentroutine%};
Map := TDictionary<LongInt, LongInt>.Create();
//Map.Capacity := 10000000;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do begin
ReadLn(InOut, I);
Inc(Total);
if not Map.ContainsKey(I) then
begin
Map.Add(I, 1);
Inc(Unique);
end
else
Map[I] := Map[I] + 1;
end;
Close(InOut);
Pairs := Map.ToArray();
Map.Free();
TArrayHelper<TIntPair>.Sort(
Pairs,
TComparer<TIntPair>.Construct(ComparePairs)
);
Assign(InOut, outFilename);
Rewrite(InOut);
for Pair in Pairs do with Pair do
WriteLn(InOut, Key, ' - ', Value);
Close(InOut);
end;
procedure SortCountHoward;
var
arr: array of Integer;
textf: TextFile;
min: Integer = High(Integer);
max: Integer = -1;
i: Integer;
begin
routineName := {$I %currentroutine%};
AssignFile(textf, inFilename);
Reset(textf);
while not EOF(textf) do
begin
ReadLn(textf, i);
Inc(Total);
if i < min then
min := i;
if i > max then
max := i;
end;
SetLength(arr, max-min+1);
Reset(textf);
while not EOF(textf) do
begin
ReadLn(textf, i);
Dec(i, min);
Inc(arr[i]);
end;
CloseFile(textf);
AssignFile(textf, outFilename);
Rewrite(textf);
for i := Low(arr) to High(arr) do
case (arr[i] > 0) of
True:
begin
WriteLn(textf, i+min, ' - ', arr[i]);
Inc(Unique);
end;
end;
CloseFile(textf);
SetLength(arr, 0);
end;
procedure SortCountAvk1;
type
TCounter = TGHashMultiSetLP<Integer>;
TCountRef = TGAutoRef<TCounter>;
TEntry = TCounter.TEntry;
function EntryCmp(constref L, R: TEntry): SizeInt;
begin
if L.Key > R.Key then
Result := 1
else
if L.Key < R.Key then
Result := -1
else
Result := 0;
end;
var
CountRef: TCountRef;
InOut: Text;
Counter: TCounter;
e: TEntry;
I: Integer;
begin
routineName := {$I %currentroutine%};
Counter := CountRef;
//Counter.LoadFactor := 0.7;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do
begin
ReadLn(InOut, I);
Counter.Add(I);
end;
Close(InOut);
Total := Counter.Count;
Unique := Counter.EntryCount;
if Counter.NonEmpty then
begin
Assign(InOut, outFilename);
Rewrite(InOut);
for e in Counter.Entries.Sorted(EntryCmp) do
with e do
WriteLn(InOut, Key, ' - ', Count);
Close(InOut);
end;
end;
procedure SortCountAvk2;
type
TIntArray = array of Integer;
function LoadData: TIntArray;
var
PCurr, PLast: PByte;
DataSize, CurrValue, I: Integer;
DoReading: Boolean = False;
begin
Result := nil;
I := 0;
with TMemoryStream.Create do
try
LoadFromFile(inFileName);
DataSize := Size;
if DataSize <= 0 then
exit;
SetLength(Result, DataSize div 10);
PCurr := Memory;
PLast := PCurr + Size;
CurrValue := 0;
repeat
if PCurr^ > $0D then
begin
DoReading := True;
CurrValue := CurrValue * 10 + PCurr^ - Ord('0');
end
else
if DoReading then
begin
if Length(Result) = I then
SetLength(Result, I * 2);
Result[I] := CurrValue;
Inc(I);
CurrValue := 0;
DoReading := False;
end;
Inc(PCurr);
until PCurr > PLast;
finally
Free;
end;
SetLength(Result, I);
end;
var
List: TIntArray;
InOut: Text;
I, J, Count, DupCount: Integer;
begin
routineName := {$I %currentroutine%};
List := LoadData;
if List = nil then
exit;
TGOrdinalArrayHelper<Integer>.Sort(List);
Total := Length(List);
Count := Total;
DupCount := 0;
I := 0;
Assign(InOut, outFilename+'1');
Rewrite(InOut);
repeat
J := List[I];
while (I < Count) and (List[I] = J) do
begin
Inc(DupCount);
Inc(I);
end;
WriteLn(InOut, J, ' - ', DupCount);
Inc(Unique);
DupCount := 0;
until I = Count;
Close(InOut);
end;
procedure SortCountJulkas1;
type
TIntLess = TLess<LongInt>;
TIntVect = TVector<LongInt>;
TOrd = TOrderingArrayUtils<TIntVect, LongInt, TIntLess>;
const
bsz = 1 shl 17;
var
sc: array[0..2147483647 shr 17] of TIntVect;
i: LongInt;
pkey, key, cnt: LongInt;
offset: LongInt;
InOut: Text;
begin
routineName := {$I %currentroutine%};
for i := Low(sc) to High(sc) do sc[i] := TIntVect.Create;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do
begin
ReadLn(InOut, key);
Inc(Total);
sc[key shr 17].PushBack(key and $1FFFF);
end;
Close(InOut);
Assign(InOut, outFilename);
Rewrite(InOut);
offset := -bsz;
for i := Low(sc) to High(sc) do
begin
Inc(offset, bsz);
pkey := -1;
cnt := 0;
if sc[i].Size > 1 then TOrd.Sort(sc[i], sc[i].Size);
for key in sc[i] do
begin
if pkey <> key then
begin
if cnt <> 0 then
begin
WriteLn(InOut, offset + pkey, ' - ', cnt);
Inc(Unique);
end;
pkey := key;
cnt := 0;
end;
Inc(cnt);
end;
if cnt <> 0 then
begin
WriteLn(InOut, offset + pkey, ' - ', cnt);
Inc(Unique);
end;
end;
Close(InOut);
for i := Low(sc) to High(sc) do sc[i].Destroy;
end;
procedure SortCountJulkas2;
type
TIntLess = TLess<LongInt>;
TIntVect = TVector<LongInt>;
TOrd = TOrderingArrayUtils<TIntVect, LongInt, TIntLess>;
var
sc: array[0..21474] of TIntVect;
i: LongInt;
pkey, key, cnt: LongInt;
offset: LongInt;
InOut: Text;
begin
routineName := {$I %currentroutine%};
for i := Low(sc) to High(sc) do sc[i] := TIntVect.Create;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do
begin
ReadLn(InOut, key);
Inc(Total);
sc[key div 100000].PushBack(key mod 100000);
end;
Close(InOut);
Assign(InOut, outFilename);
Rewrite(InOut);
for i := Low(sc) to High(sc) do if sc[i].Size > 1 then TOrd.Sort(sc[i], sc[i].Size);
offset := -100000;
for i := Low(sc) to High(sc) do
begin
Inc(offset, 100000);
pkey := -1;
cnt := 0;
for key in sc[i] do
begin
if pkey <> key then
begin
if cnt <> 0 then
begin
WriteLn(InOut, offset + pkey, ' - ', cnt);
Inc(Unique);
end;
pkey := key;
cnt := 0;
end;
Inc(cnt);
end;
if cnt <> 0 then
begin
WriteLn(InOut, offset + pkey, ' - ', cnt);
Inc(Unique);
end;
end;
Close(InOut);
for i := Low(sc) to High(sc) do
sc[i].Destroy;
end;
procedure SortCount440bx;
begin
routineName := {$I %currentroutine%};
WinSortCount3.DataCount := 0;
WinSortCount3.Unique := 0;
WinSortCount3.InFileName := inFilename;
WinSortCount3.OutFileName := outFilename;
WinSortCount3.SortCount;
Total := WinSortCount3.DataCount;
Unique := WinSortCount3.Unique;
end;
procedure SortCountBrunoK; { Note : requires Classes }
const
cCR = $0D;
c0 = Ord('0');
function LoadStreamToList(aMemStream: TMemoryStream; aList: TFPList): integer;
var
{ Parse lines }
lPByte, lPEndByte: PByte;
{ Values extraction }
lPByteTextStart: PByte = nil;
lValueStarted: boolean = False;
lDWORD: DWORD;
lCntRec: integer;
begin
{ Prepare aList }
lCntRec := aMemStream.Size;
if lCntRec <= 0 then // Stream empty ?
exit(0);
aList.Count := lCntRec div 10; // Setup approximative size
aList.Count := 0;
lPByte := PByte(aMemStream.memory);
lPEndByte := lPByte + aMemStream.Size;
while lPByte <= lPEndByte do begin
if (lPByte = lPEndByte) or (lPByte^ <= cCR) then begin
if lValueStarted then begin
lDWORD := 0;
while lPByteTextStart < lPByte do begin
lDWORD := lDWORD * 10 + lPByteTextStart^ - c0;
Inc(lPByteTextStart);
end;
aList.Add(Pointer(lDWORD));
lValueStarted := False;
end;
end
else if not lValueStarted then begin
lPByteTextStart := lPByte;
lValueStarted := True;
end;
Inc(lPByte);
end;
Result := aList.Count;
end;
function BkCompare(Item1, Item2: Pointer): integer;
begin
Result := 1;
if Item1 < Item2 then
Result := -1
else if Item1 = Item2 then
Result := 0;
end;
var
lFile: TextFile;
lMemStream: TMemoryStream;
lNbRecs: integer = 0;
lFPList: TFPList;
lIx: integer;
lLastValue: pointer;
lListCount: integer;
lLastValueCount: integer;
begin
routineName := 'SortCountBrunoK'; //{$I %currentroutine%};
lMemStream := TMemoryStream.Create;
lMemStream.LoadFromFile(inFileName);
lFPList := TFPList.Create;
lNbRecs := LoadStreamToList(lMemStream, lFPList);
lMemStream.Free; // Not needed anymore
if lNbRecs > 0 then begin
AssignFile(lFile, outFilename);
Rewrite(lFile);
lFPList.Sort(@BkCompare);
lIx := 0;
lLastValue := lFPList[lIx];
lLastValueCount := 1;
lListCount := lFPList.Count;
repeat
Inc(lIx);
if (lIx >= lListCount) or (lFPList[lIx] <> lLastValue) then begin
Inc(unique);
WriteLn(lFile, UINTPTR(lLastValue), ' - ', lLastValueCount);
if (lIx >= lListCount) then
Break;
lLastValue := lFPList[lIx];
lLastValueCount := 1;
end
else
Inc(lLastValueCount);
until False;
CloseFile(lFile);
Total := lIx;
end;
lFPList.Free;
end;
procedure Run(aProc: TProcedure);
begin
Total := 0;
Unique := 0;
Start := Now;
try
aProc();
WriteLn(Copy(routineName, 10, 20):7,'''s time: ', MilliSecondsBetween(Now(), Start) / 1000.0 : 0 : 4,#9'#unique: ',Unique,' #total: ',Total);
except
on e: Exception do
WriteLn('crashes with message "', e.Message, '"');
end;
end;
begin
Randomize;
procedures := TProcedureArray.Create(
SortCountJulkas1, SortCountJulkas2, SortCountAkira, SortCountHoward, SortCountAvk1,
SortCountAvk2, SortCount440bx, SortCountBrunoK);
for randomrange := 1 to 10 do
begin
GenerateData(randomrange, 10);
WriteLn(#10'RandomRange = ',randomrange);
for proc in procedures do
Run(proc);
end;
for repeatCount := 1 to 10 do
begin
GenerateData(8, 2*repeatCount);
WriteLn(#10'repeatMillionsCount = ', 2*repeatCount);
for proc in procedures do
Run(proc);
end;
end.