// LONGSTRINGS ON needed for ParamStr to return a null terminated string
{$LONGSTRINGS ON}
unit WinSortCount2;
interface
uses
windows,
sysutils,
dateutils
;
const
ntdll = 'ntdll.dll';
kernel32 = 'kernel32.dll';
var
// constants to replace ParamStr(1) and ParamStr(2) respectively
InFileName: string = '';
OutFileName: string = '';
DataCount : ptruint;
Unique : ptruint;
procedure SortCount;
implementation
// -----------------------------------------------------------------------------
// input file related types
type
// the input file is made up of unixtimes occupying 10 bytes followed by
// a CR/LF (on Windows) To avoid comparing strings (which is slow) we define
// the unixtime characters as being composed on a qword and a word followed
// the the CR/LF ending.
TINPUT_FILE_ELEMENT = packed record
case boolean of
0 : (
ife_hi : qword; // first 8 bytes of unixtime
ife_lo : word; // trailing 2 bytes of unixtime
ife_LineEnding : word; // CRLF
);
1 : (
ife_unixtime : packed array[0..9] of char;
ife_crlf : word;
);
end;
PINPUT_FILE_ELEMENT = ^TINPUT_FILE_ELEMENT;
// -----------------------------------------------------------------------------
// kernel32 related types and functions
type
PSECURITY_ATTRIBUTES = ^TSECURITY_ATTRIBUTES;
TSECURITY_ATTRIBUTES = record
Length : DWORD;
PointerToSecurityDescriptor : pointer;
InheritHandle : boolean32;
end;
function GetFileSizeEx( FileHandle : THANDLE;
var PointerToFileSize : qword)
: boolean32; stdcall; external kernel32;
function CreateFileA(PointerToFileName : pchar;
DesiredAccess : DWORD;
ShareMode : DWORD;
PointerToSecurityAttributes : PSECURITY_ATTRIBUTES;
CreationDisposition : DWORD;
FlagsAndAttributes : DWORD;
TemplateFileHandle : THANDLE)
: THANDLE; stdcall; external kernel32;
// -----------------------------------------------------------------------------
// ntdll related types and functions
type
TCompareFunction = function (key : pointer; data : pointer) : ptrint; cdecl;
const
COMPARE_EQUAL = 0;
COMPARE_GREATER = 1;
COMPARE_LESS = -1;
procedure qsort(Base : pointer;
ElementCount : ptruint;
ElementSize : ptruint;
CompareFunction : TCompareFunction);
cdecl; external ntdll;
procedure RtlMoveMemory(Destination : pointer;
Source : pointer;
BytesToCopy : ptruint);
stdcall; external ntdll;
const
CRLF = #13#10;
FORMAT : pchar = '%10.10s - %d' + CRLF;
// if there are no duplicates at all, then the output file will be the size
// of the input file plus 6 additional characters (see FORMAT above)
ADDITIONAL_OUTPUT = 6;
// NOTE : don't user user32.dll wsprintf, it's a paraplegic dog.
function sprintf(OutputDestination : pchar;
Format : pchar;
UnixTime : pchar;
Count : integer) : integer; cdecl; { CDECL !!}
external ntdll name 'sprintf';
// -----------------------------------------------------------------------------
function CompareUnixTimes(EntryA, EntryB : PINPUT_FILE_ELEMENT)
: ptrint; cdecl;
begin
if EntryA^.ife_hi > EntryB^.ife_hi then exit(COMPARE_GREATER);
if EntryA^.ife_hi < EntryB^.ife_hi then exit(COMPARE_LESS);
// the first qword of both entries is the same, use the last 2 bytes of the
// unixtime
if EntryA^.ife_lo > EntryB^.ife_lo then exit(COMPARE_GREATER);
if EntryA^.ife_lo < EntryB^.ife_lo then exit(COMPARE_LESS);
// they are the same
result := COMPARE_EQUAL;
end;
// -----------------------------------------------------------------------------
procedure Error(Id : ptruint);
begin
write('FATAL : ');
case Id of
1 : writeln('LoadInputFileIntoMemory failed');
2 : writeln('WriteOutputFile - failed to create output file');
3 : writeln('WriteOutputFile - failed to write to output file');
end;
halt(Id);
end;
// -----------------------------------------------------------------------------
function LoadInputFileIntoMemory (Filename : pchar;
var Filesize : qword;
var OutputBuffer : pchar)
: PINPUT_FILE_ELEMENT;
// maps the input file in memory, determines its size and copies the input
// file into a separate memory block (because the input file is not supposed
// to be sorted.) Also allocates a buffer for the output file.
var
FileHandle : THANDLE = 0;
FileMapping : THANDLE = 0;
FileMapAddress : pointer = nil;
// variables that will be returned upon success
FileData : pointer = nil;
Size : qword = 0;
FileOutBuffer : pointer = nil;
// to create a local scope (strictly local)
SCOPE : integer = 0;
UnixTimesCount : qword = 0;
const
// constants used by CreateFile
NO_TEMPLATE_FILE = 0;
// constants used by CreateFileMapping
NO_MAXIMUM_SIZE_HIGH = 0; // 0 indicates to use the size of the
NO_MAXIMUM_SIZE_LOW = 0; // file
// constants used by MapViewOfFileEx
FILE_OFFSET_HIGH = 0; // file offset to map from
FILE_OFFSET_LOW = 0;
BEGINNING_TO_END = 0;
begin
// initialize return values
result := nil;
Filesize := 0;
OutputBuffer := nil;
// map the input file and allocate necessary resources.
for SCOPE := 1 to 1 do // trick to create a scope one can break out of
begin
FileHandle := CreateFileA(Filename,
GENERIC_READ,
FILE_SHARE_READ,
nil,
OPEN_EXISTING,
FILE_ATTRIBUTE_NORMAL,
NO_TEMPLATE_FILE);
if FileHandle = INVALID_HANDLE_VALUE then break;
if not GetFileSizeEx(FileHandle, Size) then break;
if Size = 0 then break; // empty file
// with the file handle, create a mapping for it
FileMapping := CreateFileMappingA(FileHandle,
nil,
PAGE_READONLY,
NO_MAXIMUM_SIZE_HIGH, // use file size
NO_MAXIMUM_SIZE_LOW,
nil);
if (FileMapping = 0) then break;
FileMapAddress := MapViewOfFileEx(FileMapping,
FILE_MAP_READ,
FILE_OFFSET_HIGH, // from beginning
FILE_OFFSET_LOW,
BEGINNING_TO_END, // to end
nil); // map anywhere
if FileMapAddress = nil then break;
// allocate a memory block to hold the file data since the file itself
// won't be sorted
FileData := HeapAlloc(GetProcessHeap(),
0,
Size);
if FileData = nil then break;
// copy the data in the file to the block we just allocated
RtlMoveMemory(FileData, FileMapAddress, Size);
// allocate a buffer for the output file
UnixTimesCount := Size div sizeof(TINPUT_FILE_ELEMENT);
FileOutBuffer := HeapAlloc(GetProcessHeap(),
0,
Size + (UnixTimesCount * ADDITIONAL_OUTPUT));
if FileOutBuffer = nil then break; // just in case additional
// instructions are added at a
// later time.
end;
if (FileHandle <> INVALID_HANDLE_VALUE) then CloseHandle(FileHandle);
if (FileMapping <> 0) then CloseHandle(FileMapping);
if (FileMapAddress <> nil) then UnmapViewOfFile(FileMapAddress);
if (FileData <> nil) then
begin
Filesize := Size;
OutputBuffer := FileOutBuffer;
result := FileData;
end;
end;
// -----------------------------------------------------------------------------
procedure WriteOutputFile(OutputFilename : pchar;
DataOutputBuffer, DataOutPtr : pchar);
const
FILE_NO_SHARE = 0;
NO_TEMPLATE_FILE = 0;
var
FileHandle : THANDLE;
ByteCount : ptruint;
Ok : BOOL = FALSE;
BytesWritten : DWORD = 0;
begin
FileHandle := CreateFileA(OutputFilename,
GENERIC_READ or GENERIC_WRITE,
FILE_NO_SHARE,
nil,
CREATE_ALWAYS,
FILE_ATTRIBUTE_NORMAL,
NO_TEMPLATE_FILE);
if FileHandle = INVALID_HANDLE_VALUE then Error(2);
ByteCount := DataOutPtr - DataOutputBuffer;
Ok := WriteFile(FileHandle,
DataOutputBuffer^,
ByteCount,
BytesWritten,
nil);
if not Ok then Error(3);
CloseHandle(FileHandle);
end;
// -----------------------------------------------------------------------------
procedure SortCount;
var
Data : PINPUT_FILE_ELEMENT = nil;
DataEnd : PINPUT_FILE_ELEMENT = nil;
DataOutputBuffer : pchar = nil;
DataOutPtr : pchar = nil;
DataOutLength : integer = 0;
Filesize : qword = 0; // compiler whines otherwise
InstanceCount : integer = 0;
i, j : PINPUT_FILE_ELEMENT;
begin
Data := LoadInputFileIntoMemory(PChar(InFileName), Filesize, DataOutputBuffer);
if Data = nil then Error(1); // an empty file is treated as an error
// sort the data
DataCount := Filesize div sizeof(TINPUT_FILE_ELEMENT);
qsort(Data,
DataCount,
sizeof(Data^),
TCompareFunction(@CompareUnixTimes)
);
// use the same algorithm used by Avk to produce the output file.
Unique := 0;
InstanceCount := 0;
i := Data;
j := i;
DataEnd := Data + DataCount;
// determine the duplicate counts
DataOutPtr := DataOutputBuffer;
repeat
while (j < DataEnd)
and
((j^.ife_hi = i^.ife_hi) and (j^.ife_lo = i^.ife_lo))
do
begin
inc(InstanceCount);
inc(j);
end;
DataOutLength := sprintf(DataOutPtr,
FORMAT,
i^.ife_unixtime, InstanceCount);
inc(DataOutPtr, DataOutLength);
InstanceCount := 0;
inc(Unique);
i := j;
until j >= DataEnd;
WriteOutputFile(PChar(OutFileName), DataOutputBuffer, DataOutPtr);
//writeln('#unique : ', Unique, ' #Total : ', DataCount);
HeapFree(GetProcessHeap(), 0, Data);
HeapFree(GetProcessHeap(), 0, DataOutputBuffer);
end;
// -----------------------------------------------------------------------------
//
//var
// Start : TDATETIME;
//
//begin
// Start := Now;
//
// SortCount;
//
// writeln;
// writeln('elapsed time: ', MilliSecondsBetween(Now(), Start) / 1000.0 : 0 : 4);
end.