program MatrixMultiplication;
{$mode objfpc}
{$optimization on}
{$MODESWITCH CLASSICPROCVARS+}
{$LONGSTRINGS ON}
uses
SysUtils,
Classes,
DateUtils,
Math;
const
N = 1000;
unroll = 4;
ThreadCount = 6;
type
TMatrix = array of double;
TVector = array of double;
var
A, B, R1, R2, R3: TMatrix;
StartTime: TDateTime;
procedure InitializeMatrix(var M: TMatrix);
var
i, j: integer;
begin
for i := 0 to N - 1 do
for j := 0 to N - 1 do
M[i * N + j] := Random;
end;
function Multiply1(const mat1, mat2: TMatrix): TMatrix;
var
i, j, k: nativeuint;
sum: double;
v: TVector;
begin
SetLength(v, N);
SetLength(Result, N * N);
for j := 0 to N - 1 do
begin
for k := 0 to N - 1 do
v[k] := mat2[k * N + j];
for i := 0 to N - 1 do
begin
sum := 0;
for k := 0 to N - 1 do
sum := sum + mat1[i * N + k] * v[k];
Result[i * N + j] := sum;
end;
end;
end;
procedure Multiply2Part(const mat1, mat2: TMatrix; var Result: TMatrix;
start_i, end_i: nativeuint);
var
i, j, k: nativeuint;
sum: double;
sum1, sum2, sum3, sum4: double;
v: TVector;
pm, pv: PDouble;
begin
SetLength(v, N);
for j := 0 to N - 1 do
begin
for k := 0 to N - 1 do
v[k] := mat2[k * N + j];
for i := start_i to end_i do
begin
sum := 0;
sum1 := 0;
sum2 := 0;
sum3 := 0;
sum4 := 0;
k := 0;
pm := @mat1[i * N];
pv := @v[0];
while k < (N - unroll + 1) do
begin
sum1 := sum1 + pm[k] * pv[k];
sum2 := sum2 + pm[k + 1] * pv[k + 1];
sum3 := sum3 + pm[k + 2] * pv[k + 2];
sum4 := sum4 + pm[k + 3] * pv[k + 3];
Inc(k, unroll);
end;
sum := sum1 + sum2 + sum3 + sum4;
while k < N do
begin
sum := sum + pm[k] * pv[k];
Inc(k);
end;
Result[i * N + j] := sum;
end;
end;
end;
type
TMultThread = class(TThread)
private
FStartI, FEndI: nativeuint;
FMat1, FMat2: TMatrix;
FResult: TMatrix;
public
constructor Create(StartI, EndI: nativeuint; const Mat1, Mat2: TMatrix;
var ResultMat: TMatrix);
procedure Execute; override;
end;
constructor TMultThread.Create(StartI, EndI: nativeuint;
const Mat1, Mat2: TMatrix; var ResultMat: TMatrix);
begin
inherited Create(True);
FStartI := StartI;
FEndI := EndI;
FMat1 := Mat1;
FMat2 := Mat2;
FResult := ResultMat;
FreeOnTerminate := False;
end;
procedure TMultThread.Execute;
begin
Multiply2Part(FMat1, FMat2, FResult, FStartI, FEndI);
end;
function MultiplyThreaded(const mat1, mat2: TMatrix): TMatrix;
var
i: integer;
start_i, end_i: nativeuint;
rowsPerThread, remainder: nativeuint;
Threads: array of TMultThread;
begin
SetLength(Result, N * N);
rowsPerThread := N div ThreadCount;
remainder := N mod ThreadCount;
SetLength(Threads, ThreadCount);
for i := 0 to ThreadCount - 1 do
begin
start_i := i * rowsPerThread;
end_i := start_i + rowsPerThread - 1;
if i = ThreadCount - 1 then
end_i := end_i + remainder;
Threads[i] := TMultThread.Create(start_i, end_i, mat1, mat2, Result);
end;
for i := 0 to ThreadCount - 1 do
Threads[i].Start;
for i := 0 to ThreadCount - 1 do
Threads[i].WaitFor;
for i := 0 to ThreadCount - 1 do
Threads[i].Free;
end;
function IsSame(const mat1, mat2: TMatrix): boolean;
var
i, k: nativeuint;
begin
Result := True;
for i := 0 to N - 1 do
for k := 0 to N - 1 do
if not SameValue(mat1[i * N + k], mat2[i * N + k]) then
Exit(False);
end;
begin
SetLength(A, N * N);
SetLength(B, N * N);
Randomize;
InitializeMatrix(A);
InitializeMatrix(B);
StartTime := Now;
R1 := Multiply1(A, B);
Writeln('Execution Time 1: ', MilliSecondsBetween(Now, StartTime), ' ms');
StartTime := Now;
R2 := MultiplyThreaded(A, B);
Writeln('Execution Time 2 (Threaded): ', MilliSecondsBetween(Now, StartTime), ' ms');
WriteLn('IsSame: ', IsSame(R1, R2));
ReadLn;
end.