unit XMLParser;

// Version 1.0.0
//
// The contents of this file are subject to the Mozilla Public License
// Version 1.1 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the
// specific language governing rights and limitations under the License.
//
// Site: http://www.mythcode.org
// Author: Dzianis Koshkin
// E-mail: k5@yandex.ru
//
// (C) 2005 MYTHcode.org

interface

uses SysUtils, HTTPApp;

type

TTagType = (ttBeginTag{<e>}, ttEndTag{</e>}, ttTermTag{<e/>}, ttCommentTag{<! >}, ttDefineTag{<? >}, ttUnknownTag);

TToken = record
  From: PChar;
  Before: PChar;
end;

TXMLParser = class
protected
  FDocument: string;
  PName: PChar;
  PSpace: PChar;
  PAttribute: PChar;
  PBefore: PChar;
  PInitial: PChar;
  PFinal: PChar;
  POpen: PChar;
  PClose: PChar;
  PNextOpen: PChar;
  PBeginSelected: PChar;
  PEndSelected: PChar;
  FTagType: TTagType;
  FLastToken: Integer;
  FTokens: array of TToken;
  FLastAttribute: Integer;
  FAttributes: array of Integer;
  procedure PreAttribute;
  procedure InitAttributes;
  procedure SetDocument(const S: string); overload;
  function GetLastAttribute: Integer;
  function GetTagCode: string;
  function GetTagText: string;
  function GetNameCode: string;
  function GetName: string;
  function GetSpace: string;
  function GetContentCode: string;
  function GetContentText: string;
  function GetContentTrimText: string;
  function GetContentSpaceTrimText: string;
  function GetStepCode: string;
  function GetHead: string;
  function GetTail: string;
  function GetSelected: string;
  function GetValue(Name: string): string;
  function GetAttribute(Index: Integer): string;
public
  Compare: function(const A, B: string): Integer;
  constructor Create(const S: string = '');
  function Next: boolean;
  procedure BeginSelect;
  procedure EndSelect;
  property Document: string read FDocument write SetDocument;
  property TagType: TTagType read FTagType;
  property Name: string read GetName;        // <[p]:html> Paragraph </p:html>
  property NameCode: string read GetNameCode;// <[p:html] id="1"> Paragraph </p:html>
  property Space: string read GetSpace;      // <p:[html]> Paragraph </p:html>
  property TagCode: string read GetTagCode;  // [<p id="1">] Paragraph </p:html>
  property TagText: string read GetTagText;  // <[p id="1"]> Paragraph </p:html>
  property ContentCode: string read GetContentCode;// <p>[ Paragraph ]</p>
  property ContentText: string read GetContentText;// <p>[ Paragraph ]</p>
  property ContentTrimText: string read GetContentTrimText;// <p>#13#10[ Paragraph ]#13#10</p>
  property ContentSpaceTrimText: string read GetContentSpaceTrimText;// <p> [Paragraph] </p>
  property StepCode: string read GetStepCode;
  property Head: string read GetHead;
  property Tail: string read GetTail;
  property Selected: string read GetSelected;
  property LastAttribute: Integer read GetLastAttribute;
  property Attribute[Index: Integer]: string read GetAttribute;//<p [id]="1"> Paragraph </p>
  property Value[Name: string]: string read GetValue; default;//<p id="[1]"> Paragraph </p>
end;

implementation

function Get(const From, Before: PChar): string; overload;
begin
  Assert(From<>nil);
  SetLength(Result, Before-From);
  Move(From^, Pointer(Result)^, Length(Result));
end;

function Get(const Token: TToken): string; overload;
begin
  with Token do
  begin
    Assert(From<>nil);
    SetLength(Result, Before-From);
    Move(From^, Pointer(Result)^, Length(Result));
  end;
end;

procedure TXMLParser.SetDocument(const S: string);
begin
  if S='' then FDocument:=' ' else FDocument:=S;
  PInitial:=Pointer(FDocument);
  PFinal:=@PInitial[Length(FDocument)];
  POpen:=PInitial;
  PClose:=PInitial;
  PNextOpen:=PInitial;
  PBeginSelected:=PInitial;
  PEndSelected:=PInitial;
  FTagType:=ttUnknownTag;
end;

constructor TXMLParser.Create(const S: string = '');
begin
  SetDocument(S);
  Compare:=CompareText;
end;

function TXMLParser.Next: boolean;
begin
  Result:=false;
  if (PNextOpen=PFinal) then Exit;
  PAttribute:=nil;
  FLastToken:=-1;
  FLastAttribute:=-1;
  POpen:=PNextOpen;
  PClose:=POpen;
  Inc(PClose);
  while (PClose^<>'>') and (PClose<PFinal) do Inc(PClose);
  if PClose[-1]='/' then
  begin
    FTagType:=ttTermTag;
    PName:=POpen+1;
    PBefore:=PClose-1;
  end else
  begin
    case POpen[1] of
      '/':
      begin
        FTagType:=ttEndTag;
        PName:=POpen+2;
      end;
      '!':
      begin
        FTagType:=ttCommentTag;
        PName:=POpen+2;
      end;
      '?':
      begin
        FTagType:=ttDefineTag;
        PName:=POpen+2;
      end;
      else
      begin
        FTagType:=ttBeginTag;
        PName:=POpen+1;
      end;
    end;
    PBefore:=PClose;
  end;
  PNextOpen:=PClose;
  while (PNextOpen^<>'<') and (PNextOpen<PFinal) do Inc(PNextOpen);
  Result:=true;
end;

function TXMLParser.GetName: string;
begin
  if PAttribute=nil then PreAttribute;
  Result:=Get(PName,PSpace);
end;

function TXMLParser.GetSpace: string;
begin
  if PAttribute=nil then PreAttribute;
  Result:=Get(PSpace+1, PAttribute);
end;

function TXMLParser.GetNameCode;
begin
  if PAttribute=nil then PreAttribute;
  Result:=Get(PName,PAttribute);
end;

function TXMLParser.GetTagCode: string;
begin
  Result:=Get(POpen, PClose+1);
end;

function TXMLParser.GetTagText;
begin
  Result:=Get(POpen+1, PClose);
end;

function TXMLParser.GetContentCode: string;
begin
  Result:=Get(PClose+1, PNextOpen);
end;

function TXMLParser.GetStepCode: string;
begin
  Result:=Get(POpen, PNextOpen);
end;

function TXMLParser.GetContentText;
begin
  Result:=HTMLDecode(Get(PClose+1, PNextOpen));
end;

function TXMLParser.GetHead;
begin
  Result:=Get(PInitial, PClose+1);
end;

function TXMLParser.GetTail;
begin
  Result:=Get(PClose+1, PFinal+1);
end;

procedure TXMLParser.BeginSelect;
begin
  PBeginSelected:=PClose;
end;

procedure TXMLParser.EndSelect;
begin
  PEndSelected:=POpen;
end;

function TXMLParser.GetSelected: string;
begin
  Result:=Get(PBeginSelected+1, PEndSelected);
end;

function TXMLParser.GetLastAttribute: Integer;
begin
  if FLastAttribute<0 then InitAttributes;;
  Result:=FLastAttribute;
end;

function TXMLParser.GetAttribute(Index: Integer): string;
begin
  Result:=Get(FTokens[FAttributes[Index]]);
end;

function TXMLParser.GetContentTrimText;
var
   P, P_: PChar;
begin
  P:=PClose+1;
  P_:=PNextOpen;
  while (P<P_) and (P^<#32) do Inc(P);
  while (P<P_) and (P_[-1]<#32) do Dec(P_);
  Result:=HTMLDecode(Get(P, P_));
end;

function TXMLParser.GetContentSpaceTrimText;
var
  P, P_: PChar;
begin
  P:=PClose+1;
  P_:=PNextOpen;
  while (P<P_) and (P^<#33) do Inc(P);
  while (P<P_) and (P_[-1]<#33) do Dec(P_);
  Result:=HTMLDecode(Get(P,P_));
end;

procedure TXMLParser.PreAttribute;
begin
  PSpace:=nil;
  PAttribute:=PName;
  while (PAttribute^>#33) and (PAttribute<PBefore) do
  begin
    if PAttribute^=':' then PSpace:=PAttribute;
    Inc(PAttribute);
  end;
  if PSpace=nil then PSpace:=PAttribute;
end;

function TXMLParser.GetValue(Name: string): string;
var
  i: Integer;
begin
  for i:=0 to GetLastAttribute do
  begin
    if Compare(Get(FTokens[FAttributes[i]]),Name)=0 then
    begin
      Result:=Get(FTokens[Succ(FAttributes[i])]);
      Exit;
    end;
  end;
  Result:='';
end;

procedure TXMLParser.InitAttributes;
var
  P1: PChar;
  P2: PChar;

procedure AddToken;
begin
  Inc(FLastToken);
  if Length(FTokens)<=FLastToken then SetLength(FTokens, Succ(FLastToken*2));
  with FTokens[FLastToken] do
  begin
    From:=P1;
    Before:=P2;
  end;
end;

procedure AddAttribute;
begin
  Inc(FLastAttribute);
  if Length(FAttributes)<Succ(FLastAttribute) then SetLength(FAttributes, Succ(FLastAttribute*2));
  FAttributes[FLastAttribute]:=FLastToken;
end;

label
  Iteration;

begin

  if PAttribute=nil then PreAttribute;

  P1:=PAttribute;
  while P1<PBefore do
  case P1^ of

    '"':
    begin
      P2:=P1+1;
      while (P2<PBefore) and (P2^<>'"') do Inc(P2);
      Inc(P1);
      AddToken;
      P1:=P2+1;
    end;

    '''':
    begin
      P2:=P1+1;
      while (P2<PBefore) and (P2^<>'''') do Inc(P2);
      Inc(P1);
      AddToken;
      P1:=P2+1;
    end;

    '=':
    begin
      if FLastToken>-1 then AddAttribute;
      Inc(P1);
    end;

    else
    begin
      if P1^>#32 then
      begin

        P2:=P1;

        while (P2<PBefore) do
        begin
          if P2^='=' then
          begin
            AddToken;
            AddAttribute;
            goto Iteration;
          end else
          if (P2^<#33) then
          begin
            AddToken;
            goto Iteration;
          end;
          Inc(P2);
        end;

        AddToken;
        Exit;

        Iteration: P1:=P2+1;

      end else Inc(P1);
    end;
  end;

  AddToken;

end;

end.