program Project1;
uses sysutils, strutils;
type
chacha20state = record
state: array[0..15] of dword;
keystream: array[0..15] of dword;
position: dword;
end;
procedure chacha20_set_counter(var state: chacha20state; counter: dword);
begin
state.state[12] := counter;
state.position := 64;
end;
procedure chacha20_init(var state: chacha20state; key, nonce: string; counter: dword=0);
const
magic = 'expand 32-byte k';
begin
fillchar(state, sizeof(state), 0);
move(magic[1], state.state[0], 16);
if length(key) > 32 then setlength(key, 32);
if key <> '' then move(key[1], state.state[4], length(key));
chacha20_set_counter(state, counter);
if length(nonce) > 12 then setlength(nonce, 12);
if nonce <> '' then move(nonce[1], state.state[13], length(nonce));
end;
function rotl32(x, n: dword): dword; inline;
begin
result := (x shl n) or (x shr (32-n));
end;
procedure chacha20_quarterround(p: pdword; a, b, c, d: integer); inline;
var
a_val, b_val, c_val, d_val: dword;
begin
a_val := p[a];
b_val := p[b];
c_val := p[c];
d_val := p[d];
a_val += b_val;
d_val := rotl32(d_val xor a_val, 16);
c_val += d_val;
b_val := rotl32(b_val xor c_val, 12);
a_val += b_val;
d_val := rotl32(d_val xor a_val, 8);
c_val += d_val;
b_val := rotl32(b_val xor c_val, 7);
p[a] := a_val;
p[b] := b_val;
p[c] := c_val;
p[d] := d_val;
end;
procedure chacha20_next_block(var state: chacha20state);
var
i: integer;
begin
move(state.state, state.keystream, 64);
for i := 1 to 10 do begin
// Column rounds
chacha20_quarterround(@state.keystream, 0, 4, 8, 12);
chacha20_quarterround(@state.keystream, 1, 5, 9, 13);
chacha20_quarterround(@state.keystream, 2, 6, 10, 14);
chacha20_quarterround(@state.keystream, 3, 7, 11, 15);
// Diagonal rounds
chacha20_quarterround(@state.keystream, 0, 5, 10, 15);
chacha20_quarterround(@state.keystream, 1, 6, 11, 12);
chacha20_quarterround(@state.keystream, 2, 7, 8, 13);
chacha20_quarterround(@state.keystream, 3, 4, 9, 14);
end;
for i := 0 to high(state.keystream) do
state.keystream[i] += state.state[i];
state.state[12] += 1;
state.position := 0;
end;
procedure chacha20_xor(var state: chacha20state; data: pointer; len: dword);
var
p: PByte;
remain, block_count, i: dword;
pData, pKey: PDWord;
begin
p := data;
remain := 64 - state.position;
if remain > 0 then begin
if remain >= 4 then begin
pData := PDWord(p);
pKey := @state.keystream[state.position div 4];
for i := 0 to (remain div 4) - 1 do begin
pData[i] := pData[i] xor pKey[i];
end;
inc(p, (remain div 4) * 4);
dec(len, (remain div 4) * 4);
inc(state.position, (remain div 4) * 4);
remain := remain mod 4;
end;
if remain > 0 then begin
if remain > len then remain := len;
for i := 0 to remain - 1 do
p[i] := p[i] xor pbyte(@state.keystream[state.position + i])^;
inc(p, remain);
dec(len, remain);
inc(state.position, remain);
end;
end;
block_count := len div 64;
if block_count > 0 then begin
pData := PDWord(p);
for i := 0 to block_count - 1 do begin
if state.position >= 64 then
chacha20_next_block(state);
pKey := @state.keystream[0];
pData[0] := pData[0] xor pKey[0];
pData[1] := pData[1] xor pKey[1];
pData[2] := pData[2] xor pKey[2];
pData[3] := pData[3] xor pKey[3];
pData[4] := pData[4] xor pKey[4];
pData[5] := pData[5] xor pKey[5];
pData[6] := pData[6] xor pKey[6];
pData[7] := pData[7] xor pKey[7];
pData[8] := pData[8] xor pKey[8];
pData[9] := pData[9] xor pKey[9];
pData[10] := pData[10] xor pKey[10];
pData[11] := pData[11] xor pKey[11];
pData[12] := pData[12] xor pKey[12];
pData[13] := pData[13] xor pKey[13];
pData[14] := pData[14] xor pKey[14];
pData[15] := pData[15] xor pKey[15];
inc(pData, 16);
state.position := 64;
end;
len := len mod 64;
p := PByte(pData);
end;
if len > 0 then begin
if state.position >= 64 then
chacha20_next_block(state);
if len >= 4 then begin
pData := PDWord(p);
pKey := @state.keystream[state.position div 4];
for i := 0 to (len div 4) - 1 do begin
pData[i] := pData[i] xor pKey[i];
end;
inc(p, (len div 4) * 4);
dec(len, (len div 4) * 4);
inc(state.position, (len div 4) * 4);
end;
for i := 0 to len - 1 do
p[i] := p[i] xor pbyte(@state.keystream[state.position + i])^;
inc(state.position, len);
end;
end;
const
size = 1024*1024*1024;
var
cc: chacha20state;
data: pointer;
start, end_: qword;
begin
chacha20_init(cc, DupeString('a', 32), DupeString('b', 12));
data := getmem(size);
start := GetTickCount64;
chacha20_xor(cc, data, size);
end_ := GetTickCount64;
writeln('FPC 3.2.2 Unrolled ',end_-start, ' ms');
writeln('verify = ', pint32(@cc.keystream)^);
writeln(Format('%.2f MB/s', [(size/(1024*1024))/((end_-start)/1000)]));
readln;
end.