{ Sergey Bodrov (serbod@gmail.com) 2016-06-01 }
program knucleotide;
uses SysUtils, Classes, IniFiles, syncobjs;
const THREADS_NUM = 4; // set max number of parallel threads
type
TNuclCountRec = record
Name: string;
Count: Integer;
end;
TNuclCountArr = array of TNuclCountRec;
{ TWorker }
TWorker = class(TThread)
protected
procedure Execute(); override;
public
sFrame: ShortString;
Len: Integer;
Done: Boolean;
end;
var
shNucl: TStringHash;
NCArr: TNuclCountArr;
NCArrLen: Integer;
Total1, Total2: Integer;
WorkersCount: Integer;
lock: TCriticalSection;
WorkersList: TList;
procedure CountSeq(const sSeq: String);
var
n: integer;
begin
n := shNucl.ValueOf(sSeq);
if n = -1 then
begin
lock.Acquire();
n := NCArrLen;
Inc(NCArrLen);
SetLength(NCArr, NCArrLen);
NCArr[n].Name := sSeq;
NCArr[n].Count := 1;
shNucl.Add(sSeq, n);
lock.Release();
end
else
begin
InterLockedIncrement(NCArr[n].Count);
end;
end;
procedure CountFrame(Len: Integer; const sFrame: ShortString);
var
i, n: integer;
s: string;
begin
n := Length(sFrame);
for i := 1 to Len do
begin
s := Copy(sFrame, i, 1);
CountSeq(s);
InterLockedIncrement(Total1);
if i < n then // last pair
begin
s := Copy(sFrame, i, 2);
CountSeq(s);
InterLockedIncrement(Total2);
end;
s := Copy(sFrame, i, 18);
if Pos('ggt', s) = 1 then
begin
CountSeq('ggt');
if Pos('ggta', s) = 1 then
begin
CountSeq('ggta');
if Pos('ggtatt', s) = 1 then
begin
CountSeq('ggtatt');
if Pos('ggtattttaatt', s) = 1 then
begin
CountSeq('ggtattttaatt');
if Pos('ggtattttaatttatagt', s) = 1 then CountSeq('ggtattttaatttatagt');
end;
end;
end;
end;
end;
end;
function SortItems(Item1, Item2: Pointer): Integer;
begin
Result := TNuclCountRec(Item2^).Count - TNuclCountRec(Item1^).Count;
end;
procedure ExportFreq(n, TotalCount: Integer);
var
i: Integer;
d: Double;
lst: TList;
begin
lst := TList.Create();
try
for i := 0 to NCArrLen-1 do
begin
if Length(NCArr[i].Name) <> n then Continue;
lst.Add(@NCArr[i]);
end;
lst.Sort(@SortItems);
for i := 0 to lst.Count-1 do
begin
d := TNuclCountRec(lst[i]^).Count / (TotalCount / 100);
WriteLn(UpperCase(TNuclCountRec(lst[i]^).Name)+' '+FormatFloat('0.000', d));
end;
finally
lst.Free();
end;
end;
procedure ExportCount(sSeq: string);
var
i, n: Integer;
begin
n := 0;
for i := 0 to NCArrLen-1 do
begin
if NCArr[i].Name = sSeq then
begin
n := NCArr[i].Count;
Break;
end;
end;
WriteLn(IntToStr(n) + #$09 + UpperCase(sSeq));
end;
procedure SpawnWorker(Len: Integer; const sFrame: ShortString);
var
Worker: TWorker;
i: Integer;
begin
if WorkersList.Count < THREADS_NUM then
begin
Worker := TWorker.Create(True);
Worker.FreeOnTerminate := False;
WorkersList.Add(Worker);
end
else
begin
Worker := nil;
while WorkersCount >= THREADS_NUM do Sleep(0);
while not Assigned(Worker) do
begin
for i := 0 to WorkersList.Count-1 do
begin
if TWorker(WorkersList[i]).Done then
begin
Worker := TWorker(WorkersList[i]);
end;
end;
end;
end;
Worker.Len := Len;
Worker.sFrame := sFrame;
Worker.Done := False;
Worker.Suspended := False;
end;
procedure Main();
var
s, sPrevFrame: ShortString;
bSkip: Boolean;
iFrameLen: Integer;
ii: Integer;
begin
shNucl := TStringHash.Create(16);
lock := TCriticalSection.Create();
WorkersList := TList.Create();
NCArrLen := 0;
SetLength(NCArr, NCArrLen);
bSkip := True;
Total1 := 0;
Total2 := 0;
sPrevFrame := '';
iFrameLen := 0;
FormatSettings.DecimalSeparator := '.';
WorkersCount := 0;
ii := 0;
while True do
begin
ReadLn(s);
if Length(s) = 0 then Break;
if bSkip then
begin
if Pos('>THREE', s) > 0 then
begin
bSkip := False;
end;
Continue;
end;
Inc(ii);
// prev frame + 18
if iFrameLen > 0 then
begin
if Length(sPrevFrame) <> (iFrameLen + 18) then
SetLength(sPrevFrame, iFrameLen + 18);
Move(s[1], sPrevFrame[iFrameLen+1], 18);
if THREADS_NUM > 1 then
SpawnWorker(iFrameLen, sPrevFrame)
else
CountFrame(iFrameLen, sPrevFrame);
end
else
begin
SetLength(sPrevFrame, Length(s) + 18);
end;
iFrameLen := Length(s);
Move(s[1], sPrevFrame[1], iFrameLen);
end;
SetLength(sPrevFrame, iFrameLen);
CountFrame(iFrameLen, sPrevFrame);
while WorkersCount > 0 do Sleep(1);
WorkersList.Free();
lock.Free();
shNucl.Free();
// export result
ExportFreq(1, Total1);
WriteLn();
ExportFreq(2, Total2);
WriteLn();
ExportCount('ggt');
ExportCount('ggta');
ExportCount('ggtatt');
ExportCount('ggtattttaatt');
ExportCount('ggtattttaatttatagt');
end;
{ TWorker }
procedure TWorker.Execute;
begin
while not Terminated do
begin
if not Done then
begin
InterlockedIncrement(WorkersCount);
CountFrame(Len, sFrame);
InterlockedDecrement(WorkersCount);
Done := True;
end;
Sleep(0);
end;
end;
begin
Main();
end.