program OccurrenceCounter;
{$mode delphi}
{$ImplicitExceptions Off}
{$MODESWITCH NESTEDPROCVARS}
uses
SysUtils, DateUtils,
Generics.Defaults, Generics.Collections,
LGUtils, LGHashMultiSet, LGArrayHelpers,
gutil, garrayutils, gvector, gmap,
WinSortCount3;
type
TIntPair = TPair<LongInt, LongInt>;
TProcedureArray = array of procedure;
function ComparePairs(constref L, R: TIntPair): LongInt;
begin
if L.Key < R.Key then
Result := -1
else if L.Key = R.Key then
Result := 0
else
Result := 1;
end;
var
Total, Unique, repeatCount, randomrange: Integer;
Start: TDateTime;
inFilename: String = 'data.txt';
outFilename: String = 'sorted.txt';
routineName: String;
procedures: TProcedureArray;
proc: procedure;
procedure GenerateData(randomRange: Integer=8; repeatMillionsCount: Integer=2);
var
InFile: Text;
I: LongInt;
begin
Assign(InFile, inFilename);
Rewrite(InFile);
for I := 1 to repeatMillionsCount * 1000000 do
WriteLn(InFile, 1500000000 + Random(randomRange * 100000));
Close(InFile);
end;
procedure SortCountAkira;
var
I: LongInt;
InOut: Text;
Map: TDictionary<LongInt, LongInt>;
Pair: TIntPair;
Pairs: TArray<TIntPair>;
begin
routineName := {$I %currentroutine%};
Map := TDictionary<LongInt, LongInt>.Create();
//Map.Capacity := 10000000;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do begin
{$I-}ReadLn(InOut, I);{$I+}
Inc(Total);
if not Map.ContainsKey(I) then
begin
Map.Add(I, 1);
Inc(Unique);
end
else
Map[I] := Map[I] + 1;
end;
Close(InOut);
Pairs := Map.ToArray();
Map.Free();
TArrayHelper<TIntPair>.Sort(
Pairs,
TComparer<TIntPair>.Construct(ComparePairs)
);
Assign(InOut, outFilename);
Rewrite(InOut);
for Pair in Pairs do with Pair do
{$I-}WriteLn(InOut, Key, ' - ', Value);{$I+}
Close(InOut);
end;
procedure SortCountHoward;
var
arr: array of Integer;
textf: TextFile;
min: Integer = High(Integer);
max: Integer = -1;
i: Integer;
begin
routineName := {$I %currentroutine%};
AssignFile(textf, inFilename);
Reset(textf);
while not EOF(textf) do
begin
{$I-}ReadLn(textf, i);{$I+}
Inc(Total);
if i < min then
min := i;
if i > max then
max := i;
end;
SetLength(arr, max-min+1);
Reset(textf);
while not EOF(textf) do
begin
ReadLn(textf, i);
Dec(i, min);
Inc(arr[i]);
end;
CloseFile(textf);
AssignFile(textf, outFilename);
Rewrite(textf);
for i := Low(arr) to High(arr) do
case (arr[i] > 0) of
True:
begin
{$I-}WriteLn(textf, i+min, ' - ', arr[i]); {$I+}
Inc(Unique);
end;
end;
CloseFile(textf);
SetLength(arr, 0);
end;
procedure SortCountAvk1;
type
TCounter = TGHashMultiSetLP<Integer>;
TCountRef = TGAutoRef<TCounter>;
TEntry = TCounter.TEntry;
function EntryCmp(constref L, R: TEntry): SizeInt;
begin
if L.Key > R.Key then
Result := 1
else
if L.Key < R.Key then
Result := -1
else
Result := 0;
end;
var
CountRef: TCountRef;
InOut: Text;
Counter: TCounter;
e: TEntry;
I: Integer;
begin
routineName := {$I %currentroutine%};
Counter := CountRef;
Counter.LoadFactor := 0.7;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do
begin
{$I-}ReadLn(InOut, I);{$I+}
Counter.Add(I);
end;
Close(InOut);
Total := Counter.Count;
Unique := Counter.EntryCount;
if Counter.NonEmpty then
begin
Assign(InOut, outFilename);
Rewrite(InOut);
for e in Counter.Entries.Sorted(EntryCmp) do
with e do
{$I-}WriteLn(InOut, Key, ' - ', Count);{$I+}
Close(InOut);
end;
end;
procedure SortCountAvk2;
var
List: array of Integer;
InOut: Text;
I, J, Count, DupCount: Integer;
begin
routineName := {$I %currentroutine%};
Assign(InOut, inFilename);
Reset(InOut);
SetLength(List, 4096);
I := 0;
while not EOF(InOut) do
begin
{$I-}ReadLn(InOut, J);{$I+}
Inc(Total);
if Length(List) = I then
SetLength(List, I * 2);
List[I] := J;
Inc(I);
end;
Close(InOut);
SetLength(List, I);
if List = nil then
exit;
TGOrdinalArrayHelper<Integer>.Sort(List);
Count := I;
DupCount := 0;
I := 0;
Assign(InOut, outFilename);
Rewrite(InOut);
repeat
J := List[I];
while (I < Count) and (List[I] = J) do
begin
Inc(DupCount);
Inc(I);
end;
{$I-}WriteLn(InOut, J, ' - ', DupCount);{$I+}
Inc(Unique);
DupCount := 0;
until I = Count;
Close(InOut);
end;
procedure SortCountJulkas1;
type
TIntLess = TLess<LongInt>;
TIntVect = TVector<LongInt>;
TOrd = TOrderingArrayUtils<TIntVect, LongInt, TIntLess>;
const
bsz = 1 shl 17;
var
sc: array[0..2147483647 shr 17] of TIntVect;
i: LongInt;
pkey, key, cnt: LongInt;
offset: LongInt;
InOut: Text;
begin
routineName := {$I %currentroutine%};
for i := Low(sc) to High(sc) do sc[i] := TIntVect.Create;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do
begin
{$I-}ReadLn(InOut, key);{$I+}
Inc(Total);
sc[key shr 17].PushBack(key and $1FFFF);
end;
Close(InOut);
Assign(InOut, outFilename);
Rewrite(InOut);
offset := -bsz;
for i := Low(sc) to High(sc) do
begin
Inc(offset, bsz);
pkey := -1;
cnt := 0;
if sc[i].Size > 1 then TOrd.Sort(sc[i], sc[i].Size);
for key in sc[i] do
begin
if pkey <> key then
begin
if cnt <> 0 then
begin
{$I-}WriteLn(InOut, offset + pkey, ' - ', cnt);{$I+}
Inc(Unique);
end;
pkey := key;
cnt := 0;
end;
Inc(cnt);
end;
if cnt <> 0 then
begin
{$I-}WriteLn(InOut, offset + pkey, ' - ', cnt);{$I+}
Inc(Unique);
end;
end;
Close(InOut);
for i := Low(sc) to High(sc) do sc[i].Destroy;
end;
procedure SortCountJulkas2;
type
TIntLess = TLess<LongInt>;
TIntVect = TVector<LongInt>;
TOrd = TOrderingArrayUtils<TIntVect, LongInt, TIntLess>;
var
sc: array[0..21474] of TIntVect;
i: LongInt;
pkey, key, cnt: LongInt;
offset: LongInt;
InOut: Text;
begin
routineName := {$I %currentroutine%};
for i := Low(sc) to High(sc) do sc[i] := TIntVect.Create;
Assign(InOut, inFilename);
Reset(InOut);
while not EOF(InOut) do
begin
{$I-}ReadLn(InOut, key);{$I+}
Inc(Total);
sc[key div 100000].PushBack(key mod 100000);
end;
Close(InOut);
Assign(InOut, outFilename);
Rewrite(InOut);
for i := Low(sc) to High(sc) do if sc[i].Size > 1 then TOrd.Sort(sc[i], sc[i].Size);
offset := -100000;
for i := Low(sc) to High(sc) do
begin
Inc(offset, 100000);
pkey := -1;
cnt := 0;
for key in sc[i] do
begin
if pkey <> key then
begin
if cnt <> 0 then
begin
{$I-}WriteLn(InOut, offset + pkey, ' - ', cnt);{$I+}
Inc(Unique);
end;
pkey := key;
cnt := 0;
end;
Inc(cnt);
end;
if cnt <> 0 then
begin
{$I-}WriteLn(InOut, offset + pkey, ' - ', cnt);{$I+}
Inc(Unique);
end;
end;
Close(InOut);
for i := Low(sc) to High(sc) do
sc[i].Destroy;
end;
procedure SortCount440bx;
begin
routineName := {$I %currentroutine%};
WinSortCount3.DataCount := 0;
WinSortCount3.Unique := 0;
WinSortCount3.InFileName := inFilename;
WinSortCount3.OutFileName := outFilename;
WinSortCount3.SortCount;
Total := WinSortCount3.DataCount;
Unique := WinSortCount3.Unique;
end;
procedure Run(aProc: TProcedure);
begin
Total := 0;
Unique := 0;
Start := Now;
try
aProc();
WriteLn(Copy(routineName, 10, 20):7,'''s time: ', MilliSecondsBetween(Now(), Start) / 1000.0 : 0 : 4,' #unique: ',Unique,' #total: ',Total);
except
on e: Exception do
WriteLn('crashes with message "', e.Message, '"');
end;
end;
begin
Randomize;
procedures := TProcedureArray.Create(
SortCountJulkas1, SortCountJulkas2, SortCountAkira, SortCountHoward, SortCountAvk1,
SortCountAvk2, SortCount440bx);
for randomrange := 1 to 10 do
begin
GenerateData(randomrange);
WriteLn(#10'RandomRange = ',randomrange);
for proc in procedures do
Run(proc);
end;
for repeatCount := 1 to 10 do
begin
GenerateData(8, 2*repeatCount);
WriteLn(#10'repeatMillionsCount = ', 2*repeatCount);
for proc in procedures do
Run(proc);
end;
end.