Icon View Thread

The following is the text of the current message along with any replies.
Messages 1 to 10 of 14 total
Thread Roys filters tweaked
Tue, Jul 28 2009 8:07 AMPermanent Link

"John Hay"
Roy,

I had a look at your text filters and "tweaked" them a bit.  These seem to
run quite a bit faster if a lot of text is returned in FilteredText.
Taking out the AddNewChar procedure looks untidy (ie duplicated code) but
seems to save quite a bit of time especially in the HTML filter.

John

procedure TEDBtfRTF.FilterText(const FilterType: string; const TextToFilter:
string; var FilteredText: string);
var
Cntr: integer;
CharsToProcess: integer;
Cmnd: string;
tempstrTonguehar;
filteredpos:integer;

procedure ProcessCommand;
var
 LastCmnd: string;
begin
// is an actual command so "simply" move to the end
// most of the time this will be \{ or space BUT \fprq HAS to be terminated
with a ;
// Others start & finish with {}, but by skipping to the first \par we
should miss those
 LastCmnd := Cmnd;
 Cmnd := '';
 inc(Cntr);
 while (Cntr < CharsToProcess) and (not (TextToFilter[Cntr] in [#0..#32,
'\', '}', '{'])) do begin
  Cmnd := Cmnd + TextToFilter[Cntr];
  inc(Cntr);
 end;
 Cmnd := LowerCase(Cmnd);
 if (Cmnd = '*') or
  (Cmnd = 'listtext') or
  (Cmnd = 'sv') or
  (Cmnd = 'sn') or
  (Cmnd = 'pntxtb') or
  (Cmnd = 'pntext') or
  (Cmnd = 'footer') or
  (Cmnd = 'fldrslt') or
  (Cmnd = 'fonttbl') or
  (Cmnd = 'v') or
  (Cmnd = 'pntxta') or
  (Cmnd = 'pict') or
  (Cmnd = 'rtf1')
  then begin
  while (Cntr < CharsToProcess) and (TextToFilter[Cntr] <> '}') do
inc(Cntr);
  inc(Cntr);
 end;
 if TextToFilter[Cntr] <> #32 then dec(Cntr);
 if (Cmnd = 'cell') or ((LastCmnd = 'cell') and (Cmnd[1] = 'f') and
(Length(Cmnd) = 2) and (Cmnd[2] in ['0'..'9'])) then
  if (filteredpos >= 0) and (tempstr[filteredpos] <> #32) then begin
   inc(filteredpos);
   tempstr[filteredpos] := #32;
  end;
end;
begin
filteredpos := -1;
getmem(tempstr,length(texttofilter)+1);
// setlength(tempstr,length(texttofilter));
CharsToProcess := Length(TextToFilter);
Cntr := Pos('\par', TextToFilter);
if Cntr = 0 then Cntr := 1;
while Cntr <= CharsToProcess do begin
 case TextToFilter[Cntr] of
  '{', '}': if (Cntr > 1) and ((Filteredpos = -1) or (tempstr[FilteredPos]
<> #32)) then
     if (filteredpos >= 0) and (tempstr[filteredpos] <> #32) then
  begin
   inc(filteredpos);
   tempstr[filteredpos] := #32;
 end;
  #0..#31: if TextToFilter[Cntr] in [#9, #10, #13] then
  if (filteredpos >= 0) and (tempstr[filteredpos] <> #32) then
  begin
   inc(filteredpos);
   tempstr[filteredpos] := #32;
  end;
  #32: if (filteredpos >= 0) and (tempstr[filteredpos] <> #32) then begin
    inc(filteredpos);
    tempstr[filteredpos] := #32;
  end;
  '\': if not ((Cntr > 1) and (Cntr < CharsToProcess) and
(TextToFilter[Cntr + 1] in [#39, '\', '{', '}']))
   then ProcessCommand
   else begin
    inc(Cntr);
    if TextToFilter[Cntr] = #39 then begin
     inc(filteredpos);
     tempstr[filteredpos] := Char(StrToInt('$' + TextToFilter[Cntr + 1] +
TextToFilter[Cntr + 2]));
     inc(Cntr, 2);
     if (Cntr < CharsToProcess) and (TextToFilter[Cntr + 1] = '}') then
inc(Cntr);
    end else begin
     inc(filteredpos);
     tempstr[filteredpos] := TextToFilter[Cntr]
    end;
   end;
 else begin
   inc(filteredpos);
   tempstr[filteredpos] := TextToFilter[Cntr]
  end;
 end;
 inc(Cntr);
end;
if filteredpos > -1 then
begin
  tempstr[filteredpos+1] := #0;
  filteredtext := filteredtext+string(tempstr);
end;
freemem(tempstr);
end;

procedure TEDBtfHTML.FilterHTMLText(const FilterType: string; const
TextToFilter: string; var FilteredText: string);
var
CharsToProcess: integer;
CurrentChar: integer;
TestChar: Char;
OperateOn: string;
filteredpos:integer;
tempstrTonguehar;

procedure HTMLTag;
var
 Tag: string;
 TagStart: integer;
 CRCheck: string;
 procedure FindClosingTag(WhatTag: string);
 var
  TagEndPos: integer;
 begin
  TagEndPos := PosEx(WhatTag, OperateOn, CurrentChar);
  if TagEndPos <> 0 then CurrentChar := TagEndPos + (Length(WhatTag) - 1);
 end;
begin
 Tag := '';
 inc(CurrentChar); // skip the <
 TagStart := CurrentChar;
 FindClosingTag('>');
 if CurrentChar > TagStart then begin
  Tag := Copy(OperateOn, TagStart, CurrentChar - TagStart);
  if Tag = 'HEAD' then FindClosingTag('</HEAD>')
  else if Tag = 'XML' then FindClosingTag('</XML>')
  else if Tag = 'TITLE' then FindClosingTag('</TITLE>')
  else if Tag = 'STYLE' then FindClosingTag('</STYLE>');
 end;
end;
function SpecialChar: string;
var
 resultst :string;
 HTMLChar: string;
 RollBack: integer;
 i:integer;
 function DoRollBack:string;
 begin
   resultst := '&';
   CurrentChar:=RollBack;
 end;
begin
 RollBack := CurrentChar;
 HTMLChar := '';
 while (TextToFilter[CurrentChar] <> ';') and (CurrentChar <=
CharsToProcess) do begin
  HTMLChar := HTMLChar + TextToFilter[CurrentChar];
  inc(CurrentChar);
 end;
 HTMLChar := LowerCase(HTMLChar + TextToFilter[CurrentChar]); //This should
get the ;
 if Length(HTMLChar) < 8 then begin
  if HTMLChar = '&nbsp;' then resultst := ' '
  else if HTMLChar = '&#47;' then resultst := '/'
  else if HTMLChar = '&frasl;' then resultst := '/'
  else if HTMLChar = '&#8260;' then resultst := ' /'
  else if HTMLChar = '&middot;' then resultst := ' -   '
  else if HTMLChar = '&amp;' then resultst := '&'
  else if HTMLChar = '&lt;' then resultst := '<'
  else if HTMLChar = '&gt;' then resultst := '>'
  else if HTMLChar = '&euro;' then resultst := '?'
  else if HTMLChar = '&pound;' then resultst := '£'
  else if HTMLChar = '&curren;' then resultst := CurrencyString
  else if HTMLChar = '&quot;' then resultst := #39
  else if HTMLChar = '&iexcl;' then resultst := #161
  else if HTMLChar = '&cent;' then resultst := #162
  else if HTMLChar = '&yen ;' then resultst := #165
  else if HTMLChar = '&brvbar;' then resultst := #166
  else if HTMLChar = '&sect;' then resultst := #167
  else if HTMLChar = '&uml ;' then resultst := #168
  else if HTMLChar = '&copy;' then resultst := #169
  else if HTMLChar = '&ordf;' then resultst := #170
  else if HTMLChar = '&laquo;' then resultst := #171
  else if HTMLChar = '&not ;' then resultst := #172
  else if HTMLChar = '&shy ;' then resultst := #173
  else if HTMLChar = '&reg ;' then resultst := #174
  else if HTMLChar = '&macr;' then resultst := #175
  else if HTMLChar = '&deg ;' then resultst := #176
  else if HTMLChar = '&plusmn;' then resultst := #177
  else if HTMLChar = '&sup2;' then resultst := #178
  else if HTMLChar = '&sup3;' then resultst := #179
  else if HTMLChar = '&acute;' then resultst := #180
  else if HTMLChar = '&micro;' then resultst := #181
  else if HTMLChar = '&para;' then resultst := #182
  else if HTMLChar = '&middot;' then resultst := #183
  else if HTMLChar = '&cedil;' then resultst := #184
  else if HTMLChar = '&sup1;' then resultst := #185
  else if HTMLChar = '&ordm;' then resultst := #186
  else if HTMLChar = '&raquo;' then resultst := #187
  else if HTMLChar = '&frac14;' then resultst := #188
  else if HTMLChar = '&frac12;' then resultst := #189
  else if HTMLChar = '&frac34;' then resultst := #190
  else if HTMLChar = '&iquest;' then resultst := #191
  else if HTMLChar = '&Agrave;' then resultst := #192
  else if HTMLChar = '&Aacute;' then resultst := #193
  else if HTMLChar = '&Acirc;' then resultst := #194
  else if HTMLChar = '&Atilde;' then resultst := #195
  else if HTMLChar = '&Auml;' then resultst := #196
  else if HTMLChar = '&Aring;' then resultst := #197
  else if HTMLChar = '&AElig;' then resultst := #198
  else if HTMLChar = '&Ccedil;' then resultst := #199
  else if HTMLChar = '&Egrave;' then resultst := #200
  else if HTMLChar = '&Eacute;' then resultst := #201
  else if HTMLChar = '&Ecirc;' then resultst := #202
  else if HTMLChar = '&Euml;' then resultst := #203
  else if HTMLChar = '&Igrave;' then resultst := #204
  else if HTMLChar = '&Iacute;' then resultst := #205
  else if HTMLChar = '&Icirc;' then resultst := #206
  else if HTMLChar = '&Iuml;' then resultst := #207
  else if HTMLChar = '&ETH ;' then resultst := #208
  else if HTMLChar = '&Ntilde;' then resultst := #209
  else if HTMLChar = '&Ograve;' then resultst := #210
  else if HTMLChar = '&Oacute;' then resultst := #211
  else if HTMLChar = '&Ocirc;' then resultst := #212
  else if HTMLChar = '&Otilde;' then resultst := #213
  else if HTMLChar = '&Ouml;' then resultst := #214
  else if HTMLChar = '&times;' then resultst := #215
  else if HTMLChar = '&Oslash;' then resultst := #216
  else if HTMLChar = '&Ugrave;' then resultst := #217
  else if HTMLChar = '&Uacute;' then resultst := #218
  else if HTMLChar = '&Ucirc;' then resultst := #219
  else if HTMLChar = '&Uuml;' then resultst := #220
  else if HTMLChar = '&Yacute;' then resultst := #221
  else if HTMLChar = '&THORN;' then resultst := #222
  else if HTMLChar = '&szlig;' then resultst := #223
  else if HTMLChar = '&agrave;' then resultst := #224
  else if HTMLChar = '&aacute;' then resultst := #225
  else if HTMLChar = '&acirc;' then resultst := #226
  else if HTMLChar = '&atilde;' then resultst := #227
  else if HTMLChar = '&auml;' then resultst := #228
  else if HTMLChar = '&aring;' then resultst := #229
  else if HTMLChar = '&aelig;' then resultst := #230
  else if HTMLChar = '&ccedil;' then resultst := #231
  else if HTMLChar = '&egrave;' then resultst := #232
  else if HTMLChar = '&eacute;' then resultst := #233
  else if HTMLChar = '&ecirc;' then resultst := #234
  else if HTMLChar = '&euml;' then resultst := #235
  else if HTMLChar = '&igrave;' then resultst := #236
  else if HTMLChar = '&iacute;' then resultst := #237
  else if HTMLChar = '&icirc;' then resultst := #238
  else if HTMLChar = '&iuml;' then resultst := #239
  else if HTMLChar = '&eth ;' then resultst := #240
  else if HTMLChar = '&ntilde;' then resultst := #241
  else if HTMLChar = '&ograve;' then resultst := #242
  else if HTMLChar = '&oacute;' then resultst := #243
  else if HTMLChar = '&ocirc;' then resultst := #244
  else if HTMLChar = '&otilde;' then resultst := #245
  else if HTMLChar = '&ouml;' then resultst := #246
  else if HTMLChar = '&divide;' then resultst := #247
  else if HTMLChar = '&oslash;' then resultst := #248
  else if HTMLChar = '&ugrave;' then resultst := #249
  else if HTMLChar = '&uacute;' then resultst := #250
  else if HTMLChar = '&ucirc;' then resultst := #251
  else if HTMLChar = '&uuml;' then resultst := #252
  else if HTMLChar = '&yacute;' then resultst := #253
  else if HTMLChar = '&thorn;' then resultst := #254
  else if HTMLChar = '&yuml;' then resultst := #255
  else DoRollBack;
 end else DoRollBack;
 if resultst <> #32 then begin
  for i := 1 to length(resultst) do begin
   inc(filteredpos);
   tempstr[filteredpos] := resultst[i];
  end;
 end
 else begin
  if (filteredpos >= 0) and (tempstr[filteredpos] <> #32) then begin
   inc(filteredpos);
   tempstr[filteredpos] := #32;
  end;
 end;
end;
begin
filteredpos := -1;
getmem(tempstr,length(texttofilter)+1);
CharsToProcess := Length(TextToFilter);
OperateOn := UpperCase(TextToFilter);
CurrentChar := 1;
while CurrentChar <= CharsToProcess do begin
 TestChar := TextToFilter[CurrentChar];
 case TestChar of
  #0..#9, #11, #12, #14..#31: {do nothing};
  #10, #13, #32: if (filteredpos >= 0) and (tempstr[filteredpos] <> #32)
then begin
    inc(filteredpos);
    tempstr[filteredpos] := #32;
  end;
  '<': begin
        HTMLTag;
        if (filteredpos >= 0) and (tempstr[filteredpos] <> #32) then begin
         inc(filteredpos);
          tempstr[filteredpos] := #32;
        end;
  end;
  '&': SpecialChar;
 else begin
   inc(filteredpos);
   tempstr[filteredpos] := TestChar;
  end;
 end;
 inc(CurrentChar);
end;
if filteredpos > -1 then
begin
  tempstr[filteredpos+1] := #0;
  filteredtext := filteredtext+string(tempstr);
end;
freemem(tempstr);
end;

Tue, Jul 28 2009 8:57 AMPermanent Link

Roy Lambert

NLH Associates

Team Elevate Team Elevate

John


Ta. I'll have a look. Next time could I be so ungracious as to suggest either attaching the .pas or attaching in a zip - makes it a bit easier than having to cut'n'paste.

How much faster do you reckon you've got it?

Roy Lambert
Tue, Jul 28 2009 9:17 AMPermanent Link

"John Hay"
Roy

> Ta. I'll have a look. Next time could I be so ungracious as to suggest
either attaching the .pas or attaching in a zip - makes it a bit easier than
having to cut'n'paste.

I don't have ElevateDB installed at the moment as I haven't got around to
upgrading to V2 <blush> so I just cut and pasted your procedures into a
datamodule - not inheriting from TEDBTextFilterModule etc.

> How much faster do you reckon you've got it?

I think it depends on the size of the filtered text compared to the
TexttoFilter.  If it is high and largish strings it can be 3-10 times faster
(at least it was when I tried it).

John

Tue, Jul 28 2009 10:07 AMPermanent Link

Roy Lambert

NLH Associates

Team Elevate Team Elevate

John


>I don't have ElevateDB installed at the moment as I haven't got around to
>upgrading to V2 <blush> so I just cut and pasted your procedures into a
>datamodule - not inheriting from TEDBTextFilterModule etc.

How do you think I developed them Smiley

>> How much faster do you reckon you've got it?
>
>I think it depends on the size of the filtered text compared to the
>TexttoFilter. If it is high and largish strings it can be 3-10 times faster
>(at least it was when I tried it).

Sounds worthwhile. To save me having to do an eyeball compare can you tell me what other tweaks you made?

Roy Lambert
Tue, Jul 28 2009 10:27 AMPermanent Link

"John Hay"
Roy
>
> Sounds worthwhile. To save me having to do an eyeball compare can you tell
me what other tweaks you made?
>

I preallocated the space for the filteredtext, used a pchar to populate it
and assign it to filtered text at the end.  That is why you get a big gain
if the ratio of actual text to formatting is high.  If it is low most of the
time is spent in the checking code and the gains are small.

John

Tue, Jul 28 2009 1:07 PMPermanent Link

Roy Lambert

NLH Associates

Team Elevate Team Elevate

John


Makes sense. What do you regard as a lot of text?

Roy Lambert
Tue, Jul 28 2009 1:54 PMPermanent Link

"John Hay"
Roy
>
> Makes sense. What do you regard as a lot of text?

I tried the HTML with elevates home page which is 38,463 bytes when saved as
htm and results in 15924 bytes of filtered text.
On my laptop the original filter (for 200 iterations) took about 2.25-2.3
seconds.  The revised function took 0.21-0.24 secs.
An RTF (with tables) which was 25K on disk and 4640 filtered task only went
from 2.3 to 1.75 secs.
The Delphi 7 license rtf file which is 47K on disk and produces 31109 bytes
of filtered text went from 4.25 secs to 0.53 secs.

John

Tue, Jul 28 2009 2:18 PMPermanent Link

Roy Lambert

NLH Associates

Team Elevate Team Elevate

John


You've convinced me Smiley

Roy Lambert
Wed, Jul 29 2009 4:55 AMPermanent Link

Roy Lambert

NLH Associates

Team Elevate Team Elevate

John


Is there a reason for

 FilteredText := filteredtext + string(tempstr);

rather than just

 FilteredText := string(tempstr);


For amusement - I cut'n'pasted your code into the IDE, hadn't really looked at the code. I did in the IDE and I thought why has he altered all the html special characters? This will never work thought I. Then I twigged - my newsreader had converted them for display Smiley


Roy Lambert
Wed, Jul 29 2009 5:06 AMPermanent Link

"Ian Branch"
Roy Lambert wrote:

> John
>
>
> Is there a reason for
>
>   FilteredText := filteredtext + string(tempstr);
>
> rather than just
>
>   FilteredText := string(tempstr);

I'm going to guess filteredtext already has some filter condition in it and
tempstr is adding to it.  I do this often to preserve the original filter, say
by a particular customer number, and adding other filter criteria.

Just My thought...Smile
Page 1 of 2Next Page »
Jump to Page:  1 2
Image