Lazarus
Programming => General => Topic started by: meulinux on December 20, 2011, 05:38:33 pm
-
function TfrmMain.RemoveAccent(Str: String): String;
const
WithAccents = 'àâêôûãõáéíóúçüÀÃÂÊÔÛÕÁÉÍÓÚÇÜ';
NoAccent = 'aaeouaoaeioucuAAAEOUOAEIOUCU';
var
x: Integer;
begin
for x := 1 to Length(Str) do
if Pos(Str[x],WithAccents) <> 0 then
Str[x] := NoAccent[Pos(Str[x], WithAccents)];
Result := Str;
end;
// SRec.Name = 'JAPÃO.pdf' using ShowMessage.
MyFile := RemoveAccent(SRec.Name);
ShowMessage Result = 'JAPA'
should return = 'JAPAO.pdf'
thanks
-
Try this:
function RemoveDiacritics(S :utf8string):utf8string;
// It should work for several languages
// Portuguese, Spanish and Italian, maybe for French, German and more
const
AccentedChars :array[0..53] of utf8string = ('á','à','ã','â','ä','é','è','ê','ë','í','ì','ï','î','ó','ò','õ','ô','ö','ø','ú','ù','ü','û','ç','ñ','ÿ','ý',
'Á','À','Ã','Â','Ä','É','È','Ê','Ë','Í','Ì','Ï','Î','Ó','Ò','Õ','Ô','Ö','Ø','Ú','Ù','Ü','Û','Ç','Ñ','Y','Ý');
NormalChars :array[0..53] of utf8string = ('a','a','a','a','a','e','e','e','e','i','i','i','i','o','o','o','o','oe','o','u','u','u','u','c','n','y','y',
'A','A','A','A','A','E','E','E','E','I','I','I','I','O','O','O','O','OE','O','U','U','U','U','C','N','Y','Y');
var
i, j :integer;
begin
Result := S;
for i := 0 to High(AccentedChars) do
Result := StringReplace(Result, AccentedChars[i], NormalChars[i], [rfReplaceAll]);
end;
-
It did not work.
See image and project attachments.
-
Felipe, forum seems to have a bug, I could not download this file.
-
@MeuLinux
Usage:
Edit2.Text := RemoveDiacritics(Edit1.Text);
-
@MeuLinux
Usage:
Edit2.Text := RemoveDiacritics(Edit1.Text);
Yes
-
My image.
-
unit Unit1;
{$mode objfpc}{$H+}
interface
uses
Classes, SysUtils, FileUtil, Forms, Controls, Graphics, Dialogs, StdCtrls;
type
{ TForm1 }
TForm1 = class(TForm)
Button1 : TButton;
Button2 : TButton;
Button3 : TButton;
Edit1 : TEdit;
Edit2 : TEdit;
procedure Button1Click(Sender : TObject);
procedure Button2Click(Sender : TObject);
procedure Button3Click(Sender : TObject);
private
{ private declarations }
public
{ public declarations }
end;
var
Form1 : TForm1;
implementation
uses lazutf8;
{$R *.lfm}
{ TForm1 }
function RemoveAccent(Str: String): String;
const
WithAccents = 'àâêôûãõáéíóúçüÀÃÂÊÔÛÕÁÉÍÓÚÇÜ';
NoAccent = 'aaeouaoaeioucuAAAEOUOAEIOUCU';
var
x: Integer;
begin
for x := 1 to Length(Str) do
if Pos(Str[x],WithAccents) <> 0 then
Str[x] := NoAccent[Pos(Str[x], WithAccents)];
Result := Str;
end;
function RemoveDiacritics(S :utf8string):utf8string;
// It should work for several languages
// Portuguese, Spanish and Italian, maybe for French, German and more
const
AccentedChars :array[0..53] of utf8string = ('á','à','ã','â','ä','é','è','ê','ë','í','ì','ï','î','ó','ò','õ','ô','ö','ø','ú','ù','ü','û','ç','ñ','ÿ','ý',
'Á','À','Ã','Â','Ä','É','È','Ê','Ë','Í','Ì','Ï','Î','Ó','Ò','Õ','Ô','Ö','Ø','Ú','Ù','Ü','Û','Ç','Ñ','Y','Ý');
NormalChars :array[0..53] of utf8string = ('a','a','a','a','a','e','e','e','e','i','i','i','i','o','o','o','o','oe','o','u','u','u','u','c','n','y','y',
'A','A','A','A','A','E','E','E','E','I','I','I','I','O','O','O','O','OE','O','U','U','U','U','C','N','Y','Y');
var
i, j :integer;
begin
Result := S;
for i := 0 to High(AccentedChars) do
Result := StringReplace(Result, AccentedChars[i], NormalChars[i], [rfReplaceAll]);
end;
function RemoveAcento(Str: String): String;
const
ComAcento = 'àâêôûãõáéíóúçüÀÃÂÊÔÛÕÁÉÍÓÚÇÜ';
SemAcento = 'aaeouaoaeioucuAAAEOUOAEIOUCU';
var
x: Integer;
begin
for x := 1 to UTF8Length(utf8string(Str)) do
if UTF8Pos(utf8string(Str[x]),utf8string(ComAcento)) <> 0 then
Str[x] := SemAcento[UTF8Pos(utf8string(Str[x]), utf8string(ComAcento))];
Result := Str;
end;
procedure TForm1.Button1Click(Sender : TObject);
begin
Edit2.Text := RemoveDiacritics(Edit1.Text);
end;
procedure TForm1.Button2Click(Sender : TObject);
begin
Edit2.Text := RemoveAcento(Edit1.Text);
end;
procedure TForm1.Button3Click(Sender : TObject);
begin
Edit2.Text := RemoveAccent(Edit1.Text);
end;
end.
object Form1: TForm1
Left = 234
Height = 237
Top = 194
Width = 320
BorderIcons = [biSystemMenu, biMinimize]
BorderStyle = bsSingle
Caption = 'Remove Accent'
ClientHeight = 237
ClientWidth = 320
Position = poScreenCenter
LCLVersion = '0.9.31'
object Button1: TButton
Cursor = crHandPoint
Left = 64
Height = 25
Top = 120
Width = 180
Caption = 'Remove 1 (typo)'
OnClick = Button1Click
TabOrder = 0
end
object Edit1: TEdit
Left = 12
Height = 27
Top = 20
Width = 284
TabOrder = 1
Text = 'JAPÃO.pdf'
end
object Edit2: TEdit
Left = 12
Height = 27
Top = 64
Width = 284
TabOrder = 2
end
object Button2: TButton
Cursor = crHandPoint
Left = 64
Height = 25
Top = 160
Width = 180
Caption = 'Remove 2 (meulinux)'
OnClick = Button2Click
TabOrder = 3
end
object Button3: TButton
Cursor = crHandPoint
Left = 64
Height = 25
Top = 200
Width = 180
Caption = 'Remove 3 (original)'
OnClick = Button3Click
TabOrder = 4
end
end
-
I have downloaded your project, compiled and pressed the button, and it works very well. Image attached.
-
Windows OK?
Linux bad? BUG!
-
Lazarus 0.9.31 r33012 FPC 2.5.1 i386-win32-win32/win64
Is the codification of your Code Editor different from UTF8? Maybe this is the problem.
-
worked like this:
function RemoveDiacritics(S :string):string;
// It should work for several languages
// Portuguese, Spanish and Italian, maybe for French, German and more
const
AccentedChars :array[0..53] of string = ('á','à','ã','â','ä','é','è','ê','ë','í','ì','ï','î','ó','ò','õ','ô','ö','ø','ú','ù','ü','û','ç','ñ','ÿ','ý',
'Á','À','Ã','Â','Ä','É','È','Ê','Ë','Í','Ì','Ï','Î','Ó','Ò','Õ','Ô','Ö','Ø','Ú','Ù','Ü','Û','Ç','Ñ','Y','Ý');
NormalChars :array[0..53] of string = ('a','a','a','a','a','e','e','e','e','i','i','i','i','o','o','o','o','oe','o','u','u','u','u','c','n','y','y',
'A','A','A','A','A','E','E','E','E','I','I','I','I','O','O','O','O','OE','O','U','U','U','U','C','N','Y','Y');
var
i, j :integer;
begin
Result := S;
for i := 0 to High(AccentedChars) do
Result := StringReplace(Result, AccentedChars[i], NormalChars[i], [rfReplaceAll]);
end;
-
function TfrmMain.RemoveAccent(Str: String): String;
const
WithAccents = 'àâêôûãõáéíóúçüÀÃÂÊÔÛÕÁÉÍÓÚÇÜ';
NoAccent = 'aaeouaoaeioucuAAAEOUOAEIOUCU';
var
x: Integer;
begin
for x := 1 to Length(Str) do
if Pos(Str[x],WithAccents) <> 0 then
Str[x] := NoAccent[Pos(Str[x], WithAccents)];
Result := Str;
end;
I know that there is already a solution, but I'd like to point out the errors in this first code posted:
1> Using Pos, Pos is for bytes, not characters.
2> Accessing the string as if it was an array with Str [ x ], again, this is for bytes, not characters.
3> Iterating from 1 to Length, this is for bytes, not characters
You should read everything here to learn the proper way of doing these operations in UTF-8:
http://wiki.lazarus.freepascal.org/LCL_Unicode_Support#Dealing_with_UTF8_strings_and_characters
-
function TfrmMain.RemoveAccent(Str: String): String;
const
WithAccents = 'àâêôûãõáéíóúçüÀÃÂÊÔÛÕÁÉÍÓÚÇÜ';
NoAccent = 'aaeouaoaeioucuAAAEOUOAEIOUCU';
var
x: Integer;
begin
for x := 1 to Length(Str) do
if Pos(Str[x],WithAccents) <> 0 then
Str[x] := NoAccent[Pos(Str[x], WithAccents)];
Result := Str;
end;
I know that there is already a solution, but I'd like to point out the errors in this first code posted:
1> Using Pos, Pos is for bytes, not characters.
2> Accessing the string as if it was an array with Str [ x ], again, this is for bytes, not characters.
3> Iterating from 1 to Length, this is for bytes, not characters
You should read everything here to learn the proper way of doing these operations in UTF-8:
http://wiki.lazarus.freepascal.org/LCL_Unicode_Support#Dealing_with_UTF8_strings_and_characters
Nor had I seen this post
@felipemdc
I tried in every way, it did not work.
function RemoveAcento(Str: String): String;
const
ComAcento = 'àâêôûãõáéíóúçüÀÃÂÊÔÛÕÁÉÍÓÚÇÜ';
SemAcento = 'aaeouaoaeioucuAAAEOUOAEIOUCU';
var
x: Integer;
begin
for x := 1 to UTF8Length(utf8string(Str)) do
if UTF8Pos(utf8string(Str[x]),utf8string(ComAcento)) <> 0 then
Str[x] := SemAcento[UTF8Pos(utf8string(Str[x]), utf8string(ComAcento))];
Result := Str;
end;
JAPÃO.pdf = resulted in: JAPaAO.pdf (it seems that no distinction at "ã" and "Ã"), puts two "A" (one uppercase and one lowercase) where the character is accented.
I'm just telling. It has been solved in another function.
thanks all
-
Well:
Str[ x ] := SemAcento[UTF8Pos(utf8string(Str[ x ]), utf8string(ComAcento))];
Yet again using [ x ] which accesses bytes, and 3 times in the same line.
But, StringReplace is a good solution. The alternative solution would be using UTF8CharacterPos to iterate the string, and use a case (or many ifs, dont know in which fpc version case of string was introduced), but because your substitution will change the string length you need to rebuild the destination in a temporary string.
-
Try this:
function RemoveDiacritics(S :utf8string):utf8string;
// It should work for several languages
// Portuguese, Spanish and Italian, maybe for French, German and more
const
AccentedChars :array[0..53] of utf8string = ('á','à','ã','â','ä','é','è','ê','ë','í','ì','ï','î','ó','ò','õ','ô','ö','ø','ú','ù','ü','û','ç','ñ','ÿ','ý',
'Á','À','Ã','Â','Ä','É','È','Ê','Ë','Í','Ì','Ï','Î','Ó','Ò','Õ','Ô','Ö','Ø','Ú','Ù','Ü','Û','Ç','Ñ','Y','Ý');
NormalChars :array[0..53] of utf8string = ('a','a','a','a','a','e','e','e','e','i','i','i','i','o','o','o','o','oe','o','u','u','u','u','c','n','y','y',
'A','A','A','A','A','E','E','E','E','I','I','I','I','O','O','O','O','OE','O','U','U','U','U','C','N','Y','Y');
var
i, j :integer;
begin
Result := S;
for i := 0 to High(AccentedChars) do
Result := StringReplace(Result, AccentedChars[i], NormalChars[i], [rfReplaceAll]);
end;
Hello. This code doesn't work . I have used a project from this message http://forum.lazarus.freepascal.org/index.php/topic,15576.msg83821.html#msg83821
I have attached images showing how arrays look like in memory. I think the problem is that the characters with diacritics in AccentedChars are written as 4 bytes. While the string from the edit contains 2 bytes.
Suggestion from here (http://forum.lazarus.freepascal.org/index.php/topic,15576.msg83834.html#msg83834) helps, making first array consisting of 2-byte chars (each array element occupies 16 bytes ).