Icon View Thread

The following is the text of the current message along with any replies.
Messages 1 to 1 of 1 total
Thread HTML to plain text filter
Fri, Nov 30 2007 1:42 PMPermanent Link

Roy Lambert

NLH Associates

Team Elevate Team Elevate

I'm sure someone out there will be able to improve on this. If anyone wishes to donate an rtf to plain text filter I would be delighted to accept it Smiley

Roy Lambert


procedure TEDBTextFilterModule1.EDBTextFilterModuleFilterText(const FilterType: string; const TextToFilter: string; var FilteredText: string);
var
CharsToProcess: integer;
CurrentChar: integer;
TestChar: Char;
const
Delims = [' ', ',', '.', ';', ':', '!', '"', '?', '(', ')', '/', '\', '>', '<', '[', ']', '}', '{'];
Alphas = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '_', '-'];
function HTMLTag: string;
var
 Tag: string;
 CRCheck: string;
 procedure FindClosingTag(WhatTag: string);
 var
  TagLength: integer;
  function NoTagMatch: boolean;
  var
   Cntr: integer;
  begin
   Result := False;
   for Cntr := 1 to TagLength do begin
    if WhatTag[Cntr] <> UpperCase(TextToFilter[CurrentChar + Cntr]) then begin
     Result := True;
     Break;
    end;
   end;
  end;
 begin
  TagLength := Length(WhatTag);
  while (CurrentChar < CharsToProcess - TagLength) and (NoTagMatch) do inc(CurrentChar);
  CurrentChar := CurrentChar + TagLength + 1;
 end;
begin
 Tag := '';
 while (TextToFilter[CurrentChar] <> '>') and (CurrentChar <= CharsToProcess) do begin
  Tag := Tag + TextToFilter[CurrentChar];
  inc(CurrentChar);
 end;
 Tag := Tag + TextToFilter[CurrentChar]; //This should get the >
 Tag := UpperCase(Tag);
 if Tag = '<HEAD>' then begin
  FindClosingTag('</HEAD>');
 end else if Tag = '<XML>' then begin
  FindClosingTag('</XML>');
 end else if Tag = '<TITLE>' then begin
   // We need to dump everything until the closing tag
  FindClosingTag('</TITLE>');
  Result := '';
 end else if Copy(Tag, 1, 6) = '<STYLE' then begin
  FindClosingTag('</STYLE>');
   // We need to dump everything until the closing tag - especially css stuff
  Result := '';
 end else if Tag = '<BR>' then begin
  Result := #13#10;
 end else if Copy(Tag, 1, 2) = '<P' then begin
  Result := #13#10;
 end else if Tag = '</DIV>' then begin
  if CurrentChar < CharsToProcess then begin
   if (TextToFilter[CurrentChar - 6] <> '>') and (TextToFilter[CurrentChar - 10] <> '<') then Result := #13#10 else begin
    CRCheck := TextToFilter[CurrentChar - 10] +
     TextToFilter[CurrentChar - 9] +
     TextToFilter[CurrentChar - 8] +
     TextToFilter[CurrentChar - 7] +
     TextToFilter[CurrentChar - 6];
    if UpperCase(CRCheck) <> '<DIV>' then Result := #13#10;
   end
  end else Result := '';
 end else if (Copy(Tag, 1, 3) = '</H') and (Tag[4] in ['0'..'9']) then begin
  Result := #13#10;
 end else Result := '';
end;
function SpecialChar: string;
var
 HTMLChar: string;
begin
 HTMLChar := '';
 while (TextToFilter[CurrentChar] <> ';') and (CurrentChar <= CharsToProcess) do begin
  HTMLChar := HTMLChar + TextToFilter[CurrentChar];
  inc(CurrentChar);
 end;
 HTMLChar := LowerCase(HTMLChar + TextToFilter[CurrentChar]); //This should get the ;
 Result := '';
end;
begin
if 0 <> Pos('<html', LowerCase(TextToFilter)) then begin
 FilteredText := '';
 CharsToProcess := Length(TextToFilter);
 CurrentChar := 1;
 while CurrentChar <= CharsToProcess do begin
  TestChar := TextToFilter[CurrentChar];
  case TestChar of
   #0..#9, #11, #12, #14..#31: {do nothing};
   '<': FilteredText := FilteredText + HTMLTag;
   '&': FilteredText := FilteredText + SpecialChar;
  else FilteredText := FilteredText + TestChar;
  end;
  inc(CurrentChar);
 end;
end else FilteredText := TextToFilter;
end;
Image