unit XMLParser;
interface
uses SysUtils, HTTPApp;
type
TTagType = (ttBeginTag, ttEndTag, ttTermTag, ttCommentTag, ttDefineTag, ttUnknownTag);
TToken = record
From: PChar;
Before: PChar;
end;
TXMLParser = class
protected
FDocument: string;
PName: PChar;
PSpace: PChar;
PAttribute: PChar;
PBefore: PChar;
PInitial: PChar;
PFinal: PChar;
POpen: PChar;
PClose: PChar;
PNextOpen: PChar;
PBeginSelected: PChar;
PEndSelected: PChar;
FTagType: TTagType;
FLastToken: Integer;
FTokens: array of TToken;
FLastAttribute: Integer;
FAttributes: array of Integer;
procedure PreAttribute;
procedure InitAttributes;
procedure SetDocument(const S: string); overload;
function GetLastAttribute: Integer;
function GetTagCode: string;
function GetTagText: string;
function GetNameCode: string;
function GetName: string;
function GetSpace: string;
function GetContentCode: string;
function GetContentText: string;
function GetContentTrimText: string;
function GetContentSpaceTrimText: string;
function GetStepCode: string;
function GetHead: string;
function GetTail: string;
function GetSelected: string;
function GetValue(Name: string): string;
function GetAttribute(Index: Integer): string;
public
Compare: function(const A, B: string): Integer;
constructor Create(const S: string = '');
function Next: boolean;
procedure BeginSelect;
procedure EndSelect;
property Document: string read FDocument write SetDocument;
property TagType: TTagType read FTagType;
property Name: string read GetName; property NameCode: string read GetNameCode; property Space: string read GetSpace; property TagCode: string read GetTagCode; property TagText: string read GetTagText; property ContentCode: string read GetContentCode; property ContentText: string read GetContentText; property ContentTrimText: string read GetContentTrimText; property ContentSpaceTrimText: string read GetContentSpaceTrimText; property StepCode: string read GetStepCode;
property Head: string read GetHead;
property Tail: string read GetTail;
property Selected: string read GetSelected;
property LastAttribute: Integer read GetLastAttribute;
property Attribute[Index: Integer]: string read GetAttribute; property Value[Name: string]: string read GetValue; default;end;
implementation
function Get(const From, Before: PChar): string; overload;
begin
Assert(From<>nil);
SetLength(Result, Before-From);
Move(From^, Pointer(Result)^, Length(Result));
end;
function Get(const Token: TToken): string; overload;
begin
with Token do
begin
Assert(From<>nil);
SetLength(Result, Before-From);
Move(From^, Pointer(Result)^, Length(Result));
end;
end;
procedure TXMLParser.SetDocument(const S: string);
begin
if S='' then FDocument:=' ' else FDocument:=S;
PInitial:=Pointer(FDocument);
PFinal:=@PInitial[Length(FDocument)];
POpen:=PInitial;
PClose:=PInitial;
PNextOpen:=PInitial;
PBeginSelected:=PInitial;
PEndSelected:=PInitial;
FTagType:=ttUnknownTag;
end;
constructor TXMLParser.Create(const S: string = '');
begin
SetDocument(S);
Compare:=CompareText;
end;
function TXMLParser.Next: boolean;
begin
Result:=false;
if (PNextOpen=PFinal) then Exit;
PAttribute:=nil;
FLastToken:=-1;
FLastAttribute:=-1;
POpen:=PNextOpen;
PClose:=POpen;
Inc(PClose);
while (PClose^<>'>') and (PClose<PFinal) do Inc(PClose);
if PClose[-1]='/' then
begin
FTagType:=ttTermTag;
PName:=POpen+1;
PBefore:=PClose-1;
end else
begin
case POpen[1] of
'/':
begin
FTagType:=ttEndTag;
PName:=POpen+2;
end;
'!':
begin
FTagType:=ttCommentTag;
PName:=POpen+2;
end;
'?':
begin
FTagType:=ttDefineTag;
PName:=POpen+2;
end;
else
begin
FTagType:=ttBeginTag;
PName:=POpen+1;
end;
end;
PBefore:=PClose;
end;
PNextOpen:=PClose;
while (PNextOpen^<>'<') and (PNextOpen<PFinal) do Inc(PNextOpen);
Result:=true;
end;
function TXMLParser.GetName: string;
begin
if PAttribute=nil then PreAttribute;
Result:=Get(PName,PSpace);
end;
function TXMLParser.GetSpace: string;
begin
if PAttribute=nil then PreAttribute;
Result:=Get(PSpace+1, PAttribute);
end;
function TXMLParser.GetNameCode;
begin
if PAttribute=nil then PreAttribute;
Result:=Get(PName,PAttribute);
end;
function TXMLParser.GetTagCode: string;
begin
Result:=Get(POpen, PClose+1);
end;
function TXMLParser.GetTagText;
begin
Result:=Get(POpen+1, PClose);
end;
function TXMLParser.GetContentCode: string;
begin
Result:=Get(PClose+1, PNextOpen);
end;
function TXMLParser.GetStepCode: string;
begin
Result:=Get(POpen, PNextOpen);
end;
function TXMLParser.GetContentText;
begin
Result:=HTMLDecode(Get(PClose+1, PNextOpen));
end;
function TXMLParser.GetHead;
begin
Result:=Get(PInitial, PClose+1);
end;
function TXMLParser.GetTail;
begin
Result:=Get(PClose+1, PFinal+1);
end;
procedure TXMLParser.BeginSelect;
begin
PBeginSelected:=PClose;
end;
procedure TXMLParser.EndSelect;
begin
PEndSelected:=POpen;
end;
function TXMLParser.GetSelected: string;
begin
Result:=Get(PBeginSelected+1, PEndSelected);
end;
function TXMLParser.GetLastAttribute: Integer;
begin
if FLastAttribute<0 then InitAttributes;;
Result:=FLastAttribute;
end;
function TXMLParser.GetAttribute(Index: Integer): string;
begin
Result:=Get(FTokens[FAttributes[Index]]);
end;
function TXMLParser.GetContentTrimText;
var
P, P_: PChar;
begin
P:=PClose+1;
P_:=PNextOpen;
while (P<P_) and (P^<#32) do Inc(P);
while (P<P_) and (P_[-1]<#32) do Dec(P_);
Result:=HTMLDecode(Get(P, P_));
end;
function TXMLParser.GetContentSpaceTrimText;
var
P, P_: PChar;
begin
P:=PClose+1;
P_:=PNextOpen;
while (P<P_) and (P^<#33) do Inc(P);
while (P<P_) and (P_[-1]<#33) do Dec(P_);
Result:=HTMLDecode(Get(P,P_));
end;
procedure TXMLParser.PreAttribute;
begin
PSpace:=nil;
PAttribute:=PName;
while (PAttribute^>#33) and (PAttribute<PBefore) do
begin
if PAttribute^=':' then PSpace:=PAttribute;
Inc(PAttribute);
end;
if PSpace=nil then PSpace:=PAttribute;
end;
function TXMLParser.GetValue(Name: string): string;
var
i: Integer;
begin
for i:=0 to GetLastAttribute do
begin
if Compare(Get(FTokens[FAttributes[i]]),Name)=0 then
begin
Result:=Get(FTokens[Succ(FAttributes[i])]);
Exit;
end;
end;
Result:='';
end;
procedure TXMLParser.InitAttributes;
var
P1: PChar;
P2: PChar;
procedure AddToken;
begin
Inc(FLastToken);
if Length(FTokens)<=FLastToken then SetLength(FTokens, Succ(FLastToken*2));
with FTokens[FLastToken] do
begin
From:=P1;
Before:=P2;
end;
end;
procedure AddAttribute;
begin
Inc(FLastAttribute);
if Length(FAttributes)<Succ(FLastAttribute) then SetLength(FAttributes, Succ(FLastAttribute*2));
FAttributes[FLastAttribute]:=FLastToken;
end;
label
Iteration;
begin
if PAttribute=nil then PreAttribute;
P1:=PAttribute;
while P1<PBefore do
case P1^ of
'"':
begin
P2:=P1+1;
while (P2<PBefore) and (P2^<>'"') do Inc(P2);
Inc(P1);
AddToken;
P1:=P2+1;
end;
'''':
begin
P2:=P1+1;
while (P2<PBefore) and (P2^<>'''') do Inc(P2);
Inc(P1);
AddToken;
P1:=P2+1;
end;
'=':
begin
if FLastToken>-1 then AddAttribute;
Inc(P1);
end;
else
begin
if P1^>#32 then
begin
P2:=P1;
while (P2<PBefore) do
begin
if P2^='=' then
begin
AddToken;
AddAttribute;
goto Iteration;
end else
if (P2^<#33) then
begin
AddToken;
goto Iteration;
end;
Inc(P2);
end;
AddToken;
Exit;
Iteration: P1:=P2+1;
end else Inc(P1);
end;
end;
AddToken;
end;
end.