Recent

Author Topic: Extracting string parts [Unit _Strings.pas]  (Read 14921 times)

BioHazard

  • Jr. Member
  • **
  • Posts: 57
  • Starless...
Extracting string parts [Unit _Strings.pas]
« on: February 09, 2011, 10:21:02 pm »
Maybe by following functions will be easier to extract needed string parts and more (also how to avoid regular expressions):

Codes below contain updated versions

Usage:

Code: [Select]
procedure TForm1.Button1Click(Sender: TObject);
var
  Objects: TArrayIntegerString;
begin
  Form1.Caption := After ('@', 'biohazard@online.ge');
  // Returns 'online.ge'
  // From the first occurrence of '@'

  Form1.Caption := Before ('@', 'biohazard@online.ge');
  // Returns 'biohazard'
  // From the first occurrence of '@'

  Form1.Caption := Between ('@', '.', 'biohazard@online.ge');
  // Returns 'online'
  // From the first occurrence of '@'

  Form1.Caption := AfterLast ('[', 'sin[90]*cos[180]');
  // Returns '180]'
  // From the last occurrence of '['

  Form1.Caption := BeforeLast ('[', 'sin[90]*cos[180]');
  // Returns 'sin[90]*cos['
  // From the last occurrence of '['

  Form1.Caption := BetweenLast ('[', ']', 'sin[90]*cos[180]');
  // Returns '180'
  // From the last occurrence of '['

  Form1.Caption := StrReplace ('love', 'hate', 'From Fruit1 Fruit2 I love Fruit3');
  // Returns From Fruit1 Fruit2 I hate Fruit3

  Form1.Caption := StrReplace (['Fruit1', 'Fruit2', 'Fruit3'], 'Beer', 'From Fruit1 Fruit2 I love Fruit3');
  // Returns From Beer Beer I love Beer

  Form1.Caption := StrReplace (['Fruit1', 'Fruit2', 'Fruit3'], ['Beer', 'Wine', 'Vodka'], 'From Fruit1 Fruit2 I love Fruit3');
  // Returns From Beer Wine I love Vodka

  // Below two functions require associative array class unit _Array.pas

  // var Objects: TArrayIntegerString;
  Objects := Explode ('Male,Female,Apple,BioHazard', ',');
  // Returns Objects[0]='Male', Objects[1]='Female', Objects[2]='Apple', Objects[3]='BioHazard'

  //var Objects: TArrayIntegerString;
  Form1.Caption := Implode (Objects, ',');
  // Returns 'Male,Female,Apple,BioHazard'

  Form1.Caption := NameToCaption ('borland_delphi_compiler');
  // Returns 'Borland Delphi Compiler'

  Form1.Caption := CaptionToName ('Borland Delphi Compiler');
  // Returns 'borland_delphi_compiler'

end;

_Strings.pas - Library for working with string parts

Code: [Select]
unit _Strings;

{$mode objfpc}

interface

uses
  Classes, SysUtils, StrUtils, _Arrays;

function Before (This, InThat: String): String;
function After (This, InThat: String): String;
function Between (This, That, InThat: String): String;
function AfterLast (This, InThat: String): String;
function BeforeLast (This, InThat: String): String;
function BetweenLast (This, That, InThat: String): String;
function Explode (Source, Separator: String): TArrayIntegerString;
function Implode (Source: TArrayIntegerString; Separator: String): String;
function StrReplace (const This: array of string; That: string; InThat:String): String; overload;
function StrReplace (const This: array of string; const That: array of string;  InThat:String): String; overload;
function StrReplace (This, That, InThat:String): String; overload;
function NameToCaption (Name: String): String;
function CaptionToName (Caption: String): String;

implementation

function After (This, InThat: String): String;
begin
  if Pos(This,InThat)<>0 then
  begin
    After := Copy (InThat,Pos(This,InThat)+Length(This),Length(InThat)-Length(This));
  end
  else
  begin
    After := '';
  end;
end;

function Before (This, InThat: String): String;
begin
  if Pos(This,InThat)<>0 then
  begin
    Before := Copy (InThat,1,Length(InThat)-Length(This)-Length(After(This,InThat)));
  end
  else
  begin
    Before := '';
  end;
end;

function Between (This, That, InThat: String): String;
begin
  Between := Before (That,After(This,InThat) );
end;

function AfterLast (This, InThat: String): String;
begin
  AfterLast := ReverseString (Before(This,ReverseString(InThat)));
end;

function BeforeLast (This, InThat: String): String;
begin
  BeforeLast := ReverseString (After(This,ReverseString(InThat)));
end;

function BetweenLast (This, That, InThat: String): String;
begin
  BetweenLast := BeforeLast (That,AfterLast(This, InThat) );
end;

function Implode (Source: TArrayIntegerString; Separator: String): String;
begin
  Result := '';
  Source.Reset;
  repeat
    Result := Result + Source.Value + Separator;
  until Source.Foreach;
end;

function Explode (Source, Separator: String): TArrayIntegerString;
var Count: Integer;
begin
  Result := TArrayIntegerString.Create;
  Count := 0;
  repeat
    if (Source<>'') and (Pos(Separator,Source)=0) then
    begin
        Result[Count] := Source;
    end
    else
    begin
        Result[Count] := Before(Separator,Source);
    end;
    Source := After (Separator,Source);
    Count := Count + 1;
  until Source = '';
end;

function StrReplace (const This: array of string; That: string; InThat:String): String; overload;
var I, Count: Integer;
begin
  Count := Length (This);
  for I := 0 to Count - 1 do
  begin
    InThat := StringReplace (InThat, This[I], That, [rfReplaceAll,rfIgnoreCase]);
  end;
  Result := InThat;
end;

function StrReplace (const This: array of string; const That: array of string;  InThat:String): String; overload;
var I, Count: Integer;
begin
  Count := Length (This);
  for I := 0 to Count - 1 do
  begin
    InThat := StringReplace (InThat, This[I], That[I], [rfReplaceAll,rfIgnoreCase]);
  end;
  Result := InThat;
end;

function StrReplace (This, That, InThat:String): String; overload;
begin
  Result := StringReplace (InThat, This, That, [rfReplaceAll,rfIgnoreCase]);
end;

function NameToCaption(Name: String): String;
begin
  Result := AnsiProperCase(StrReplace('_',' ',Name), StdWordDelims);
end;

function CaptionToName(Caption: String): String;
begin
  Result := LowerCase(StrReplace(' ','_',Caption));
end;

end.

_Array.pas - Associative array library (used by two functions Exlpode and Implode in _Strings.pas)

Code: [Select]
unit _Arrays;

{$mode objfpc}

interface

uses
  Classes, SysUtils;

type generic TArray <TIndex, TValue> = class
private
  Values: array of TValue;
  Indexes: array of TIndex;
  Ticker: Integer;
  function Key (Index: TIndex): Integer;
  procedure Write (Index: TIndex; Value: TValue);
  function Read (Index: TIndex): TValue;
public
  Position: Integer;
  constructor Create;
  property Items [Index: TIndex]: TValue read Read write Write; default;
  procedure Reset;
  function Index: TIndex;
  function Value: TValue;
  function Foreach: Boolean;
  function Count: Integer;
  procedure Sort;
  procedure Delete (Element: TIndex);
end;

type
  TArrayIntegerString = specialize TArray <Integer, String>;
  TArrayStringInteger = specialize TArray <String, Integer>;
  TArrayStringString = specialize TArray <String, String>;
  TArrayIntegerInteger = specialize TArray <String, String>;

implementation

constructor TArray.Create;
 begin
   SetLength(Indexes, 0);
   SetLength(Values, 0);
   Position := 0;
   Ticker := 0;
 end;

 function TArray.Foreach: Boolean;
 begin
   if (Ticker<Count) then
   begin
     Result := True;
     Ticker := Ticker + 1;
     Position := Ticker - 1;
   end
   else
   begin
     Result := False;
     Reset;
   end;
 end;

 function TArray.Count: Integer;
 begin
   Result := Length (Indexes)
 end;

 function TArray.Index: TIndex;
 begin
   Result := Indexes[Position];
 end;

 function TArray.Value: TValue;
 begin
   Result:= Values[Position];
 end;

 procedure TArray.Reset;
 begin
   Ticker := 0;
   Position := 0;
 end;

 procedure TArray.Write (Index: TIndex; Value: TValue);
 var Cursor: Integer;
 begin
   Cursor := Key(Index);
   if Cursor=-1 then
   begin
     SetLength (Indexes, Length(Indexes)+1);
     SetLength (Values, Length(Values)+1);
     Cursor := Length(Indexes)-1;
   end;
   Indexes[Cursor] := Index;
   Values[Cursor] := Value;
 end;

 function TArray.Read (Index: TIndex): TValue;
 var Cursor: Integer;
 begin
   Cursor := Key (Index);
   if Cursor=-1 then
   begin
     Result := Null;
   end
   else
   begin
     Result := Values[Cursor];
   end;
 end;

 function TArray.Key (Index: TIndex): Integer;
 var
   Current,Records: Integer;
 begin
   Result := -1;
   Records := Length (Indexes);
   for Current:=0 to Records-1 do
   begin
     if Indexes[Current]=Index then
     begin
       Result := Current;
       break;
     end;
   end;
 end;

 procedure TArray.Sort;
 var
   Current,Records: Integer;
   ValueBuffer: TValue;
   IndexBuffer: TIndex;
 begin
   Records := Length (Indexes);
   ValueBuffer := Values[0];
   for Current:=1 to Records-1 do
   begin
        if (ValueBuffer>Values[Current]) then
        begin
          Values[Current-1]:= Values[Current];
          Values[Current] := ValueBuffer;
          IndexBuffer := Indexes[Current-1];
          Indexes[Current-1] := Indexes[Current];
          Indexes[Current] := IndexBuffer;
        end;
        ValueBuffer := Values[Current];
   end;
 end;

 procedure TArray.Delete (Element: TIndex);
 var
   Current: Integer;
 begin
   Current := Key(Element);
   if Current > High(Indexes) then
   begin
      Exit;
   end;
   if Current < Low(Indexes) then
   begin
      Exit;
   end;
   if Current = High(Indexes) then
   begin
     SetLength(Indexes, Length(Indexes) - 1) ;
     Exit;
   end;
   Finalize(Indexes[Current]) ;
   System.Move(Indexes[Current +1], Indexes[Current],(Length(Indexes) - Current -1) * SizeOf(TIndex) + 1) ;
   SetLength(Indexes, Length(Indexes) - 1);
   if Current > High(Values) then
   begin
      Exit;
   end;
   if Current < Low(Values) then
   begin
      Exit;
   end;
   if Current = High(Values) then
   begin
     SetLength(Values, Length(Values) - 1) ;
     Exit;
   end;
   Finalize(Values[Current]) ;
   System.Move(Values[Current +1], Values[Current],(Length(Values) - Current -1) * SizeOf(TValue) + 1) ;
   SetLength(Values, Length(Values) - 1);
 end;

end.
« Last Edit: February 11, 2011, 08:40:15 pm by BioHazard »

Marc

  • Administrator
  • Hero Member
  • *
  • Posts: 2582
Re: Extracting string parts [Unit _Strings.pas]
« Reply #1 on: February 10, 2011, 01:39:28 am »
Most of the functions we already have:

 Form1.Caption := After ('@', 'biohazard @ online.ge');
  // Returns 'online.ge'
  // From the first occurrence of '@'

GetPart(['@'], [], 'biohazard @ online.ge');

Quote
 Form1.Caption := Before ('@', 'biohazard @ online.ge');
  // Returns 'biohazard'
  // From the first occurrence of '@'

GetPart([], ['@'], 'biohazard @ online.ge');

Quote
 Form1.Caption := Between ('@', '.', 'biohazard @ online.ge');
  // Returns 'online'
  // From the first occurrence of '@'

GetPart(['@'], ['.'], 'biohazard @ online.ge');

Quote
 Form1.Caption := AfterLast ('[', 'sin[90]*cos[180]');
  // Returns '180]'
  // From the last occurrence of '['

By using get part you can "eat" the string. It is very seldom that you only need the last part.

Quote
 Form1.Caption := BeforeLast ('[', 'sin[90]*cos[180]');
  // Returns 'sin[90]*cos['
  // From the last occurrence of '['

idem

Quote
 Form1.Caption := BetweenLast ('[', ']', 'sin[90]*cos[180]');
  // Returns '180'
  // From the last occurrence of '['

Repetitively call GetPart()

Quote
 Form1.Caption := StrReplace ('love', 'hate', 'From Fruit1 Fruit2 I love Fruit3');
  // Returns From Fruit1 Fruit2 I hate Fruit3

RTL function StringReplace()

Quote
 Form1.Caption := StrReplace (['Fruit1', 'Fruit2', 'Fruit3'], 'Beer', 'From Fruit1 Fruit2 I love Fruit3');
  // Returns From Beer Beer I love Beer

RTL function StringsReplace()

Quote
 Form1.Caption := StrReplace (['Fruit1', 'Fruit2', 'Fruit3'], ['Beer', 'Wine', 'Vodka'], 'From Fruit1 Fruit2 I love Fruit3');
  // Returns From Beer Wine I love Vodka

RTL function StringsReplace()

Quote
 // Below two functions require associative array class unit _Array.pas

  // var Objects: TArray;
  Objects := Explode ('Male,Female,Apple,BioHazard', ',');
  // Returns Objects[0]='Male', Objects[1]='Female', Objects[2]='Apple', Objects[3]='BioHazard'

use TStringList
Code: Pascal  [Select][+][-]
  1. var
  2.   SL: TStringLIst;
  3. begin
  4.   SL := TStringlist.Create;
  5.   SL.Delimeter := ',';
  6.   SL.DemimitedText := 'Male,Female,Apple,BioHazard'
  7. end;
  8.  

Quote
 //var Objects: TArray;
  Form1.Caption := Implode (Objects, ',');
  // Returns 'Male,Female,Apple,BioHazard'

Same stringlist. (SL.DelimitedText)

Quote
 Form1.Caption := NameToCaption ('borland_delphi_compiler');
  // Returns 'Borland Delphi Compiler'

there is a function for this, but I canot find it

Quote
 Form1.Caption := CaptionToName ('Borland Delphi Compiler');
  // Returns 'borland_delphi_compiler'

RTL function LowerCase

//--
{$I stdsig.inc}
//-I still can't read someones mind
//-Bugs reported here will be forgotten. Use the bug tracker

Leledumbo

  • Hero Member
  • *****
  • Posts: 8747
  • Programming + Glam Metal + Tae Kwon Do = Me
Re: Extracting string parts [Unit _Strings.pas]
« Reply #2 on: February 10, 2011, 02:10:21 am »
Looks good, but the efficiency could be improved in some places.
Code: [Select]
function After (This, InThat: String): String;
begin
  if Pos(This,InThat)<>0 then
  begin
    After := Copy (InThat,Pos(This,InThat)+Length(This),Length(InThat)-Length(This));
  end
  else
  begin
    After := '';
  end;
end;
I know we have CSE optimization, but wouldn't it be better (and shorter to write):
Code: [Select]
function After (This, InThat: String): String;
var
  ThisPos: LongWord;
begin
  ThisPos := Pos(This,InThat);
  if ThisPos <> 0 then
  begin
    After := Copy(InThat,ThisPos + Length(This),Length(InThat) - Length(This));
  end
  else
  begin
    After := '';
  end;
end;
Remember that Pos is O(m * n) where m is This length, and n is InThat length. So it's quite time consuming.

Most of the other functions simply wraps another function call, so for better performance you can use inline modifier.

The first two StrReplace functions are damn slow, since you use StringReplace inside a loop. I think in this case you should replace with your own function, which I haven't found the best way myself. I'll look for it somewhere.

Just a suggestion, please don't use _<unitname>, choose a meaningful one. StringHelper or something else.

TArray class might be convenient for some people, but not for me. I use variant only when doing automation since variant is slow and makes my binary bigger.
« Last Edit: February 10, 2011, 02:27:50 am by Leledumbo »

BioHazard

  • Jr. Member
  • **
  • Posts: 57
  • Starless...
Re: Extracting string parts [Unit _Strings.pas]
« Reply #3 on: February 10, 2011, 09:42:51 am »
Thanks for advices. I did not really know about existence of GetPart and maybe thats the reason I find now it a bit confusing. For ten years I am using After,Before,Between,AfterLast,BeforeLast,BetweenLast in php,delphi,c++ and 40% of functions called by me are they. And I fill such comfort of course they need optimization and I will do it but they are still brilliant to use.

<3

As for the associative array class I really need your help:

Please Marc and Leledumbo see my technical question in associative array thread on link below. This case is confusing me for year. The question is about technical aspects of making associative array class Indexes and Values strongly typed but not loosing goal that class worked still with every type:

http://www.lazarus.freepascal.org/index.php/topic,12077.msg61439.html#msg61439

Leledumbo

  • Hero Member
  • *****
  • Posts: 8747
  • Programming + Glam Metal + Tae Kwon Do = Me
Re: Extracting string parts [Unit _Strings.pas]
« Reply #4 on: February 10, 2011, 11:31:15 am »
Marco already gave you a point

bdexterholland

  • Jr. Member
  • **
  • Posts: 65
  • uh?
Re: Extracting string parts [Unit _Strings.pas]
« Reply #5 on: February 11, 2011, 11:49:23 am »
Quote
 Form1.Caption := NameToCaption ('borland_delphi_compiler');
  // Returns 'Borland Delphi Compiler'

there is a function for this, but I canot find it

Quote
 Form1.Caption := CaptionToName ('Borland Delphi Compiler');
  // Returns 'borland_delphi_compiler'

RTL function LowerCase

I Think the difference In CaptionToName is the UnderScore replacement the could be some useful in some cases...
[sleep .....]

BioHazard

  • Jr. Member
  • **
  • Posts: 57
  • Starless...
Re: Extracting string parts [Unit _Strings.pas]
« Reply #6 on: February 11, 2011, 08:45:07 pm »
Updated associative arrays now its based on generics, has sort, delete and foreach methods. Just in case if someone lost in google gets there..

Marc

  • Administrator
  • Hero Member
  • *
  • Posts: 2582
Re: Extracting string parts [Unit _Strings.pas]
« Reply #7 on: February 12, 2011, 11:59:59 am »
Quote
 Form1.Caption := NameToCaption ('borland_delphi_compiler');
  // Returns 'Borland Delphi Compiler'

there is a function for this, but I canot find it

Quote
 Form1.Caption := CaptionToName ('Borland Delphi Compiler');
  // Returns 'borland_delphi_compiler'

RTL function LowerCase

I Think the difference In CaptionToName is the UnderScore replacement the could be some useful in some cases...
Ooops, somehow my brain already translated the '_' into ' ' so I didn't notice the difference
//--
{$I stdsig.inc}
//-I still can't read someones mind
//-Bugs reported here will be forgotten. Use the bug tracker

BeniBela

  • Hero Member
  • *****
  • Posts: 905
    • homepage
Re: Extracting string parts [Unit _Strings.pas]
« Reply #8 on: February 15, 2011, 10:43:21 pm »
use TStringList
Except that TStringList only accept single character delimeter, and handles quotes which can be surprising.

But I prefer my function already posted some time ago that gives you a standard array of string (single char delimeter only, but easy to modify), because the gc-ed arrays are much easier to use than objects :
Code: [Select]
procedure strSplit(out   splitted: TStringArray; s: string; c: char;
  includeEmpty: boolean);
var p:longint;
    result:TStringArray;
begin
  SetLength(result,0);
  if s='' then begin
    splitted:=result;
    exit;
  end;
  p:=pos(c,s);
  while p>0 do begin
    if p=1 then begin
      if includeEmpty then begin
        setlength(result,length(result)+1);
        result[high(result)]:='';
      end;
    end else begin
      setlength(result,length(result)+1);
      result[high(result)]:=copy(s,1,p-1);
    end;
    delete(s,1,p);
    p:=pos(c,s);
  end;
  if (s<>'') or includeEmpty then begin
    SetLength(result,length(result)+1);
    result[high(result)]:=s;
  end;
  splitted:=result;
end;

 

TinyPortal © 2005-2018