The issue you're experiencing is related to how Free Pascal handles console I/O on Windows when dealing with UTF-8 encoded input. Even though you've set the codepage to UTF-8 (65001) and enabled Unicode strings in FPC, the standard
ReadLn function doesn't properly handle UTF-8 input from the Windows console.
Here's the solution:
program test;
{$CODEPAGE UTF8}
{$MODE OBJFPC}
{$MODESWITCH UNICODESTRINGS}
uses
Windows;
function ConsoleReadLn: String;
var
hConsole: THandle;
Buffer: packed array[0..1023] of WideChar;
NumRead: DWORD;
UTF8Str: UTF8String;
begin
hConsole := GetStdHandle(STD_INPUT_HANDLE);
NumRead := 0;
// Read wide characters from console
ReadConsoleW(hConsole, @Buffer[0], SizeOf(Buffer) div SizeOf(WideChar) - 1, NumRead, nil);
// Null terminate
Buffer[NumRead] := #0;
// Convert WideString to UTF8String
SetLength(UTF8Str, WideCharLenToString(nil, @Buffer[0], NumRead));
WideCharToMultiByte(CP_UTF8, 0, @Buffer[0], NumRead, PAnsiChar(UTF8Str), Length(UTF8Str), nil, nil);
Result := String(UTF8Str);
// Remove trailing newline if present
if (Length(Result) > 0) and (Result[Length(Result)] = #10) then
Delete(Result, Length(Result), 1);
if (Length(Result) > 0) and (Result[Length(Result)] = #13) then
Delete(Result, Length(Result), 1);
end;
var
S: string;
begin
Write('Enter δείγμα: ');
S := ConsoleReadLn;
WriteLn('S := ' + S);
end.
Alternatively, you can use a simpler approach by using the Windows API directly:
program test;
{$CODEPAGE UTF8}
{$MODE OBJFPC}
{$MODESWITCH UNICODESTRINGS}
uses
Windows, SysUtils;
function ReadUTF8Line: String;
var
hInput: THandle;
Buffer: array[0..1023] of WideChar;
NumRead: DWORD;
UTF8Str: UTF8String;
begin
hInput := GetStdHandle(STD_INPUT_HANDLE);
NumRead := 0;
ReadConsoleW(hInput, @Buffer[0], SizeOf(Buffer) div SizeOf(WideChar) - 1, NumRead, nil);
if NumRead > 0 then
begin
Buffer[NumRead] := #0;
SetLength(UTF8Str, WideCharLenToString(nil, @Buffer[0], NumRead));
WideCharToMultiByte(CP_UTF8, 0, @Buffer[0], NumRead, PAnsiChar(UTF8Str), Length(UTF8Str), nil, nil);
Result := String(UTF8Str);
// Remove carriage return and line feed
if (Length(Result) > 0) and (Result[Length(Result)] = #13) then
SetLength(Result, Length(Result) - 1);
if (Length(Result) > 0) and (Result[Length(Result)] = #10) then
SetLength(Result, Length(Result) - 1);
end
else
Result := '';
end;
var
S: string;
begin
Write('Enter δείγμα: ');
S := ReadUTF8Line;
WriteLn('S := ' + S);
end.
The problem occurs because the standard
ReadLn function in FPC doesn't properly translate UTF-8 input from the Windows console. The Windows console API functions (
ReadConsoleW) handle Unicode properly, so using them directly resolves the issue.
The key points are:
1. Using
ReadConsoleW to read wide characters from the console
2. Converting the wide character buffer to UTF-8 using
WideCharToMultiByte3. Properly handling the conversion to ensure the UTF-8 string is correctly formed
This approach will properly capture and display your Greek text "δείγμα" as expected.