program Unicode_and_pos_ex;
{$modeswitch UnicodeStrings}
{$codepage utf-8}
//{$mode delphiunicode}
uses Lazutf8, SysUtils, StrUtils, Windows, character;
var S1,S2 : String;
S3 : String;
// C : WideChar; You will need to uncomment this line if you switch to delphiunicode.
C : Char;
Procedure InitVars;
begin
{0 1 }
{12345678901234567}
S1 := 'bonjour sérénitàa';
S3 := 'à';
C := 'à'; //Comment this out if you switch to delphiunicode.
// C := Char('à'); You will have to uncomment this line if you switch to delphi unicode.
{
None of these calls affect the console or get it to render Unicode appropriately.
SetMultiByteConversionCodePage(CP_UTF8);
SetMultiByteRTLFileSystemCodePage(CP_UTF8);
SetConsoleOutputCP(CP_UTF8); Degrades output to console. Result is worse when this is invoked.
SetTextCodePage(Output, CP_UTF8); //Degrades output as well.
}
end;
Procedure TestUnicodeLength;
var len : integer;
begin
len := Length(S1);
If len = 17
then writeln('length works')
else writeln('length is broken: got ', len, ' expected 17');
end;
Procedure TestUnicodePos;
var i : integer;
begin
Writeln('searching for the substring ''jour'' in ''bonjour sérénitàa''');
i := Pos('jour', S1);
If i = 4
then writeln('Pos works for non-accented searches')
else writeln('Pos broken for non-accented searches. Got ', i, ' when expecting 4');
Writeln('searching for the substring ''à'' in ''bonjour sérénitàa''');
i := Pos(S3 , S1);
If i = 16
then writeln('Pos works for string accented searches')
else writeln('Pos broken for string accented searches. Got ', i, ' when expecting 16');
Writeln('searching for the char ''à'' in ''bonjour sérénitàa''');
i := Pos(C , S1);
If i = 16
then writeln('Pos works for accented character searches')
else writeln('Pos broken for accented character searches. Got ', i, ' when expecting 16');
end;
Procedure TestUnicodePosEx;
var i : integer;
begin
Writeln('searching for the substring ''à'' in ''bonjour sérénitàa'' after pos 4');
i := PosEx(S3 , S1, 4);
If i = 16
then writeln('PosEx works for string accented searches')
else writeln('PosEx broken for string accented searches. Got ', i, ' when expecting 16');
Writeln('searching for the char ''à'' in ''bonjour sérénitàa'' after post 4.');
i := PosEx(C , S1, 4);
If i = 16
then writeln('PosEx works for accented character searches')
else writeln('PosEx broken for accented character searches. Got ', i, ' when expecting 16');
end;
Procedure TestUnicodeTrimming;
const BlankPad = ' ';
begin
S2 := BlankPad + S1 + BlankPad;
S2 := Trim(S2);
If S1 = S2
then Writeln('Trim works')
else Writeln('Trim is broken');
S2 := BlankPad + S1;
S2 := TrimLeft(S2);
If S1 = S2
then Writeln('TrimLeft works')
else Writeln('TrimLeft is broken');
S2 := S1 + BlankPad;
S2 := TrimRight(S2);
If S1 = S2
then Writeln('RightTrim works')
else Writeln('RightTrim is broken');
end;
Procedure TestUnicodeCopying;
var i: integer;
begin
i := Pos(' ',S1);
S2 := Copy(S1,i+1,Length(S1));
If S2 = 'sérénitàa'
then writeln('copy works')
else writeln('copy broken');
end;
Procedure TestUnicodeUpperLower;
const lcAconst : WideChar = 'à';
ucAconst : WideChar = 'À';
var
ucA : char;
begin
Writeln('original string: ' , S1);
S2 := TCharacter.ToUpper(S1);
Writeln('Uppercase S1 = ', S2);
If S2 = 'BONJOUR SÉRÉNITÀA'
then Writeln('Uppercase works')
else Writeln('Uppercase broken: got ', S2, ' instead of BONJOUR SÉRÉNITÀA');
S2 := TCharacter.ToLower(S2);
Writeln('Lowercase S2 = ', S2);
If S2 = S1
then Writeln('Lowercase work')
else Writeln('Lowercase broken: got ', S2, ' instead of sérénitàa');
ucA := TCharacter.ToUpper(lcAconst);
If ucA = ucAConst
then writeln('character uppercase worked')
else writeln('character uppercase broken');
end;
Procedure TestIdentifier(Const S: String);
var i : integer;
begin
Write('String ' , S);
If length(S) = 0 then exit;
With TCharacter do
For i := 1 to length(S) do
If Not (IsLetterOrDigit(S[i]) or (S[i] = '_'))
then begin
Writeln(' is not an identifier');
exit;
end;
Writeln(' is an identifier');
end;
Procedure TestIdentifiers;
const French_Id1 = '___Éternité133' ;
French_Id2 = 'Pérénial_Témérité';
Croatian_Id1 = 'Vječnost' ;
Russian_Id1 = 'Вечность' ;
Arabic_Id1 = 'خلود123' ;
NonId = 'J''usqu''à demain';
begin
TestIdentifier(French_Id1);
TestIdentifier(French_Id2);
TestIdentifier(Croatian_Id1);
TestIdentifier(Russian_Id1);
TestIdentifier(Arabic_Id1);
TestIdentifier(NonId);
end;
begin
InitVars;
TestUnicodeLength;
TestUnicodePos;
TestUnicodePosEx;
TestUnicodeTrimming;
TestUnicodeCopying;
TestUnicodeUpperLower;
TestIdentifiers;
Readln();
end.