// ============================================================================
// Reader for RSS channels and Atom feeds
// Copyright (c) 2004, Juergen Haible. All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// ============================================================================

unit cWebChannel;

interface

uses SysUtils, Classes, cXML, IdHttp;

type
   TWebChannelVersion = ( html, rss091, rss10, rss20, atom03 );

   TWebChannel = class;

   TWebChannelItem = class

      private
         mWebChannel: TWebChannel;
         mDateTime: String;

         function getDate: String;
         function getIsValid: Boolean;

      public
         Title, Description, Link, Creator: String;
         property DateTime: String read getDate write mDateTime;

         property IsValid: Boolean read getIsValid;

         constructor Create( webChannel: TWebChannel );

   end;

   TWebChannel = class

      private
         mWebItems: TList;
         mDateTime: String;

         function getCount: Integer;
         function getItem(index: Integer): TWebChannelItem;
         function getDate: String;

      public
         WebChannelVersion: TWebChannelVersion;
         WebChannelVersionStr: String;
         Title, Description, DescType, Link, Creator, Language: String;
         property DateTime: String read getDate write mDateTime;

         property Count: Integer read getCount;
         property Items[ index: Integer ]: TWebChannelItem read getItem;

         constructor Create;
         destructor Destroy; override;

   end;

   TWebChannels = class

      private
         mList: TList;
         function getCount: Integer;
         function getItem(index: Integer): TWebChannel;

      public
         property Count: Integer read getCount;
         property Items[ index: Integer ]: TWebChannel read getItem;

         procedure Add( channel: TWebChannel );
         
         constructor Create;
         destructor Destroy; override;

   end;

   TWebChannelInfoType = ( wcitUrl, wcitFile, wcitText );

   TWebChannelReader = class

      private
         mCharset: String;
         mWebLink: String;
         mWebText, mWebType: String;
         mError, mWarning: String;
         mChannels: TWebChannels;
         mModified: Boolean;
         mProxyServer, mProxyUser, mProxyPass: String;
         mProxyPort: Integer;
         mHttp: TIdHTTP;

         procedure loadWebText( const comment: String );
         procedure loadWebFile( const filename: String );
         procedure parseWebText;
         procedure parseRssChannel( webChannel: TWebChannel; elemChannel: TXMLElement );
         procedure parseRssItem( webChannel: TWebChannel; elemItem: TXMLElement );
         procedure parseAtomFeed( webChannel: TWebChannel; elemFeed: TXMLElement );
         procedure parseAtomEntry( webChannel: TWebChannel; elemEntry: TXMLElement );
         function  loadLastModified( out dt: TDateTime ): Boolean;
         procedure saveLastModified( const dt: TDateTime );
         function getWebLink: String;
         function encodeHdr( hdrVal: String ): String;
         function encodeTxt( hdrVal: String ): String;

      public
         property Channels: TWebChannels read mChannels;
         property CharSet: String read mCharset;
         property WebLink: String read getWebLink write mWebLink;
         property WebText: String read mWebText;
         property Error  : String read mError;
         property Warning: String read mWarning;
         property Modified: Boolean read mModified;

         procedure GetAllItemsAsText( channel: TWebChannel;
                                      out chSubject, chFrom, chText: String;
                                      out chTime: TDateTime );
         procedure GetSelectedItemsAsText( channel: TWebChannel;
                                           indexes: TList;
                                           out chSubject, chFrom, chText: String;
                                           out chTime: TDateTime );
         procedure GetSingleItemAsText( channel: TWebChannel;
                                        index: Integer;
                                        out chSubject, chFrom, chText: String;
                                        out chTime: TDateTime;
                                        addHtml: Boolean;
                                        out chHtml, chType: String );
         procedure Terminate;

         constructor Create( const infoType: TWebChannelInfoType;
                             const info, comment: String;
                             const AProxyServer: String;
                             const AProxyPort: Integer;
                             const AProxyUser, AProxyPass: String );
         destructor Destroy; override;

   end;


procedure SplitUrl( const fullUrl: String; out url: String; out username, password: String );

function HtmlToText( html: String ): String; overload;
function HtmlToText( html: String; compress: Boolean ): String; overload;

function LoadLinkWebText( link: String;
                          pxServer: String;
                          pxPort: Integer;
                          pxUser, pxPass: String ): String; overload;
function LoadLinkWebText( link: String;
                          pxServer: String;
                          pxPort: Integer;
                          pxUser, pxPass: String;
                          out contentType: String ): String; overload;
function LoadLinkWebText( http: TIdHTTP;
                          link: String;
                          pxServer: String;
                          pxPort: Integer;
                          pxUser, pxPass: String;
                          out contentType: String ): String; overload;

implementation

uses Math, uConst, uVar, uTools, uHamTools, IniFiles, uDateTime,
     IdAuthenticationDigest, cHamster, cLogFileHamster, uCharsets, uEncoding,
     IdSSLOpenSSL, IdTCPClient;

procedure SplitUrl( const fullUrl: String; out url: String; out username, password: String );
// 'http://username:password@X' -> 'http://X', 'username', 'password'
// 'http://$42@X' -> 'http://X', '$42', ''
var  tmp: String;
     i: Integer;
begin
   url      := fullUrl;
   username := '';
   password := '';

   if Pos( 'http://', url ) <> 1 then exit;
   if Pos( '@', url ) = 0 then exit;

   tmp := copy( url, 8, MaxInt );
   i := Pos( '/', tmp );
   if i = 0 then i := Length( tmp ) + 1;

   tmp := copy( tmp, 1, i - 1 );
   i := Pos( '@', tmp );
   if i = 0 then exit;

   tmp := copy( tmp, 1, i - 1 );
   System.Delete( url, 8, length( tmp ) + 1 );

   i := Pos( ':', tmp );
   if i = 0 then begin
      username := tmp;
   end else begin
      username := copy( tmp, 1, i - 1 );
      password := copy( tmp, i + 1, MaxInt );
   end;
end;

function HtmlToText( html: String ): String;
begin
   Result := HtmlToText( html, False );
end;

function HtmlToText( html: String; compress: Boolean ): String;

   function getPart( h, key: String ): String;
   var  j: Integer;
   begin
      Result := '';
      j := Pos( '>', h );
      if j > 0 then SetLength( h, j-1 );
      j := Pos( key, LowerCase(h) );
      if j > 0 then begin
         h := copy( h, j+5, MaxInt );
         if copy( h, 1, 1 ) = '"' then System.Delete( h, 1, 1 );
         j := Pos( '"', h ); if j > 0 then SetLength( h, j-1 );
         j := Pos( ' ', h ); if j > 0 then SetLength( h, j-1 );
         Result := h;
      end;
   end;

var  i, j: Integer;
     inTag, inSpc: Boolean;
     h, tag, a, b, c: String;
begin
   Result := '';
   inTag := False;
   tag := '';

   html := StringReplace( html, '&#13;', '<br/>', [rfReplaceAll] );
   html := StringReplace( html, '&#10;',      '', [rfReplaceAll] );

   inSpc := True;
   for i := 1 to length(html) do begin

      if html[i] in [#9,#10,#13,' '] then begin
         if inSpc then continue;
         inSpc := True;
         html[i] := ' ';
      end else begin
         inSpc := False;
      end;

      if inTag then begin

         tag := tag + LowerCase( html[i] );
         
         if html[i] = '>' then begin
            inTag := False;
            if copy(tag,1,3) = '<br' then Result := Result + #13#10
            else if tag = '<p>'      then Result := Result + #13#10
            else if tag = '</p>'     then Result := Result + #13#10#13#10
            else if tag = '<li>'     then Result := Result + #13#10
            else if tag = '<table>'  then Result := Result + #13#10
            else if tag = '<tr>'     then Result := Result + #13#10
            else if tag = '<td>'     then Result := Result + #9
            else if tag = '</table>' then Result := Result + #13#10#13#10
            else if tag = '</title>' then Result := Result + #13#10#13#10
            ;
            if copy( Result, length(Result), 1 ) <= ' ' then inSpc := True;
         end;

      end else begin

         if html[i] = '<' then begin
            inTag := True;
            inSpc := True;
            tag := '<';
            if LowerCase( copy( html, i, 3 ) ) = '<a ' then begin
               h := getPart( copy(html,i+3,1024), 'href=' );
               if (h <> '') and not compress then Result := Result + '[link: ' + h + '] ';
            end;
            if LowerCase( copy( html, i, 5 ) ) = '<img ' then begin
               h := Trim( getPart( copy(html,i+5,1024), 'src=' ) + ' '
                  + getPart( copy(html,i+5,1024), 'alt=' ) );
               if (h <> '') and not compress then Result := Result + '[image: ' + h + '] ';
            end;
         end else begin
            Result := Result + html[i];
         end;
         
      end;

   end;

   Result := StringReplace( Result, '&nbsp;', ' ', [rfReplaceAll] );
   Result := StringReplace( Result, '&quot;', '"', [rfReplaceAll] );
   Result := StringReplace( Result, '&amp;',  '&', [rfReplaceAll] );
   Result := StringReplace( Result, '&lt;',   '<', [rfReplaceAll] );
   Result := StringReplace( Result, '&gt;',   '>', [rfReplaceAll] );

   repeat // '&#123;'
      i := Pos( '&#', Result );
      if i > 0 then begin

         a := copy( Result, 1, i - 1 );
         b := '?';
         c := copy( Result, i+2, MaxInt );

         j := Pos( ';', c );
         if (j > 0) and (j <= 4) then begin
            b := copy( c, 1, j - 1 );
            c := copy( c, j + 1, MaxInt );
            b := chr( StrToIntDef( b, ord('?') ) and 255 );
         end;

         Result := a + b + c;

      end;
   until i = 0;

   while copy( Result, 1, 2 ) = #13#10 do System.Delete( Result, 1, 2 );
   while copy( Result, length(Result)-1, 2 ) = #13#10 do System.Delete( Result, length(Result)-1, 2 );
end;

function ParseWebDateTime( const s: String ): TDateTime;
const chars: Array[0..3] of Char = ( '/', ' ', ':', '.' );
var  yyyy, mm, dd, hh, nn, ss: Word;
     tz: String;
     sl: TStringList;
     dt, dtBad: TDateTime;
begin
   try
      // ISO: 2004-09-12T10:51:54Z
      if (length(s) >= 4) and (copy( s, 1, 2 ) = '20') then begin
         yyyy := StrToInt( copy( s, 1, 4 ) );
         if length(s) >=  7 then mm := StrToInt( copy( s,  6, 2 ) ) else mm := 1;
         if length(s) >= 10 then dd := StrToInt( copy( s,  9, 2 ) ) else dd := 1;
         if length(s) >= 13 then hh := StrToInt( copy( s, 12, 2 ) ) else hh := 0;
         if length(s) >= 16 then nn := StrToInt( copy( s, 15, 2 ) ) else nn := 0;
         if length(s) >= 19 then ss := StrToInt( copy( s, 18, 2 ) ) else ss := 0;
         if length(s) >= 20 then tz := copy( s, 20, MaxInt ) else tz := 'GMT';
         Result := EncodeDate( yyyy, mm, dd )
                 + EncodeTime( hh, nn, ss, 0 )
                 + MinutesToDateTime( RfcTimezoneToBiasMinutes( tz ) );
         Result := DateTimeGMTToLocal( Result );
         exit;
      end;

      // RFC: Wed, 22 Sep 2004 15:39:00 GMT
      if length( s ) > 0 then begin
         try
            dtBad := EncodeDate( 1980, 1, 1 );
            dt := RfcDateTimeToDateTimeGMT( s, dtBad );
            if dt <> dtBad then begin
               Result := DateTimeGMTToLocal( dt );
               exit;
            end;
         except
            // ignore
         end;
      end;

      // M$: 4/20/2004 11:21:23 AM
      sl := TStringList.Create;
      try
         ArgsSplitChars( s, sl, chars );
         if sl.Count >= 3 then begin
            if (sl[2] >= '2000') and (sl[2] <= '2099') then begin
               mm := StrToInt( sl[0] );
               dd := StrToInt( sl[1] );
               if (mm > 12) and (dd <= 12) then begin
                  ss := mm;
                  mm := dd;
                  dd := ss;
               end;
               yyyy := StrToInt( sl[2] );
               Result := EncodeDate( yyyy, mm, dd );
               if sl.Count >= 6 then begin
                  hh := StrToInt( sl[3] );
                  nn := StrToInt( sl[4] );
                  ss := StrToInt( sl[5] );
                  if (sl.Count >= 7) then begin
                     if LowerCase( sl[6] ) = 'pm' then hh := hh + 12;
                  end;
                  Result := Result + EncodeTime( hh, nn, ss, 0 );
               end;
               exit;
            end;
         end;
      finally sl.Free end;

      // Give up and use current time instead
      Result := Now;

   except
      Result := Now;
   end;
end;

function UnifyWebDateTime( const s: String ): String;
begin
   try
      Result := FormatDateTime( 'yyyy"-"mm"-"dd hh":"nn":"ss',
                                ParseWebDateTime( s ) );
   except
      Result := s;
   end;
end;


function LoadLinkWebText( link: String;
                          pxServer: String;
                          pxPort: Integer;
                          pxUser, pxPass: String ): String;
var  ct: String;
begin
   Result := LoadLinkWebText( link, pxServer, pxPort, pxUser, pxPass, ct );
end;

function LoadLinkWebText( link: String;
                          pxServer: String;
                          pxPort: Integer;
                          pxUser, pxPass: String;
                          out contentType: String ): String;
var  http: TIdHTTP;
begin
   http := TIdHTTP.Create( nil );
   try
      Result := LoadLinkWebText( http, link,
                                 pxServer, pxPort, pxUser, pxPass, contentType );
   finally
      http.Free;
   end;
end;

function LoadLinkWebText( http: TIdHTTP;
                          link: String;
                          pxServer: String;
                          pxPort: Integer;
                          pxUser, pxPass: String;
                          out contentType: String ): String;
var  ssl: TIdSSLIOHandlerSocket;
     url, username, password, s: String;
begin
   Result := '';
   contentType := '';

   url := link;
   try

      (*
      Indy has some bugs related with "If-Modified-Since"/304. You either
      need a patched IdHTTP.pas or have to remove below line
      'http.Request.LastModified := dt;'. A patched IdHTTP.pas is included
      in folder "indy9" - see "indy-readme.txt" in this folder for details.
      *)
      http.HPG_BUG_FIXES_AVAILABLE := True;

      // Proxy settings
      if (pxServer <> '') and (pxPort > 0) then begin

         Log( LOGID_DEBUG, 'Using proxy ' + pxServer
                         + ':' + IntToStr( pxPort )
                         + iif( pxUser<>'', ' with password' ) );
         http.ProxyParams.ProxyServer   := pxServer;
         http.ProxyParams.ProxyPort     := pxPort;

         if copy( pxUser, 1, 1 ) = '$' then begin
            s := pxUser;  // script-password ($0..$99)
            if not Hamster.Passwords.UsePassword( s, username, password ) then begin
               Log( LOGID_WARN, Format( 'Missing username/password for "%s"!', [s] ) );
               exit;
            end;
            http.ProxyParams.ProxyUsername := username;
            http.ProxyParams.ProxyPassword := password;
            http.ProxyParams.BasicAuthentication := True;
         end else if pxUser <> '' then begin
            http.ProxyParams.ProxyUsername := pxUser;
            http.ProxyParams.ProxyPassword := pxPass;
            http.ProxyParams.BasicAuthentication := True;
         end;

      end;

      // SSL settings
      if copy( url, 1, 8 ) = 'https://' then begin

         if not Hamster.SSLAvailable then begin
            Log( LOGID_WARN, 'Required SSL libraries for https are not installed!' );
            exit;
         end;

         ssl := TIdSSLIOHandlerSocket.Create(nil);
         http.IOHandler := ssl;

      end;

      // Use authentication with URLs of type 'http://USERNAME:PASSWORD@...'
      // or 'http://$42@...'.
      (*
      SplitUrl( mWebLink, url, username, password );
      if username <> '' then begin

         if copy( username, 1, 1 ) = '$' then begin
            s := username;  // script-password ($0..$99)
            if not Hamster.Passwords.UsePassword( s, username, password ) then begin
               mError := Format( 'Missing username/password for "%s"!', [s] );
               exit;
            end;
         end;

         http.Request.BasicAuthentication := True;
         http.Request.Username := username;
         http.Request.Password := password;

      end;
      *)

      // User Agent to be placed in webserver's logfile
      http.Request.UserAgent := OUR_VERINFO + ' (WebReader)';

      // load feed
      try
         http.ConnectTimeout  := 30000;
         http.ReadTimeout     := 30000;
         http.HandleRedirects := True;
         Result := http.Get( url );
         contentType := http.Response.ContentType;
      except
         on ex: EIdHTTPProtocolException do begin
            // Indy-Bug: 304 is not handled correctly but leads to Exception
            if ex.ReplyErrorCode = 304 then begin // Indy-Bug
               // mModified := False;
               exit;
            end else begin
               raise;
            end;
         end else begin
            raise;
         end;
      end;

      if http.Response.ResponseCode = 304 then begin // "Not Modified"
         // mModified := False;
         exit;
      end;

      // fix linefeeds
      if Pos( #13#10, Result ) = 0 then begin
         Result := StringReplace( Result, #10, #13#10, [rfReplaceAll] );
      end;

   except
      on ex: Exception do Log( LOGID_WARN, ex.Message + ' (url="' + url + '")' );
   end;
end;


{ TWebChannelItem }

constructor TWebChannelItem.Create(webChannel: TWebChannel);
begin
   inherited Create;
   mWebChannel := webChannel;
   Title       := '';
   Description := '';
   Link        := '';
   Creator     := '';
   mDateTime   := '';
end;

function TWebChannelItem.getDate: String;
begin
   Result := mDateTime;
end;

function TWebChannelItem.getIsValid: Boolean;
begin
   Result := ( Trim( Title ) <> '' )
          or ( Trim( Description ) <> '' )
          or ( Trim( Link ) <> '' );
end;

{ TWebChannel }

constructor TWebChannel.Create;
begin
   inherited Create;
   Title       := '';
   Description := '';
   DescType    := '';
   Link        := '';
   Creator     := '';
   mDateTime   := '';
   Language    := '';
   mWebItems   := TList.Create;
end;

destructor TWebChannel.Destroy;
var  i: Integer;
begin
   if Assigned( mWebItems ) then try
      for i := 0 to mWebItems.Count - 1 do TWebChannelItem( mWebItems[i] ).Free;
      FreeAndNil( mWebItems );
   except end;
   inherited Destroy;
end;

function TWebChannel.getCount: Integer;
begin
   Result := mWebItems.Count;
end;

function TWebChannel.getItem(index: Integer): TWebChannelItem;
begin
   Result := mWebItems[ index ];
end;

function TWebChannel.getDate: String;
begin
   Result := mDateTime;
end;


{ TWebChannels }

procedure TWebChannels.Add(channel: TWebChannel);
begin
   mList.Add( channel );
end;

constructor TWebChannels.Create;
begin
   inherited Create;
   mList := TList.Create;
end;

destructor TWebChannels.Destroy;
var  i: Integer;
begin
   if Assigned( mList ) then try
      for i := 0 to mList.Count - 1 do getItem( i ).Free;
      FreeAndNil( mList );
   except end;
   inherited Destroy;
end;

function TWebChannels.getCount: Integer;
begin
   Result := mList.Count;
end;

function TWebChannels.getItem(index: Integer): TWebChannel;
begin
   Result := mList[ index ];
end;


{ TWebChannelReader }

constructor TWebChannelReader.Create( const infoType: TWebChannelInfoType;
                                      const info, comment: String;
                                      const AProxyServer: String;
                                      const AProxyPort: Integer;
                                      const AProxyUser, AProxyPass: String );
begin
   inherited Create;

   mCharset := 'UTF-8'; // CharSets.WindowsACPHandler.PreferredName;
   mWebLink := '';
   mWebText := '';
   mWebType := '';
   mChannels := TWebChannels.Create;
   mError   := '';
   mWarning := '';
   mModified := True;

   mProxyServer := AProxyServer;
   mProxyPort   := AProxyPort;
   mProxyUser   := AProxyUser;
   mProxyPass   := AProxyPass;
   mHttp        := nil;

   case infoType of
      wcitUrl:
         begin
            mWebLink := info;
            loadWebText( comment );
            if mError = '' then parseWebText;
         end;
      wcitFile:
         begin
            loadWebFile( info );
            if mError = '' then parseWebText;
         end;
      wcitText:
         begin
            mWebText := info;
            parseWebText;
         end;
   end;
end;

destructor TWebChannelReader.Destroy;
begin
   if Assigned( mChannels ) then try
      FreeAndNil( mChannels );
   except end;
   inherited Destroy;
end;

procedure TWebChannelReader.Terminate;
begin
   if Assigned( mHttp ) then try mHttp.Disconnect except end;
end;

function TWebChannelReader.loadLastModified( out dt: TDateTime ): Boolean;
var  dtEmpty: TDateTime;
begin
   dtEmpty := EncodeDate( 2000, 1, 1 );

   HamFileEnter;
   try
      with TIniFile.Create( AppSettings.GetStr(asPathBase) + 'WebReader.hst' ) do try
         dt := ReadDateTime( WebLink, 'LastModified', dtEmpty );
         Result := ( dt <> dtEmpty );
      finally Free end;
   finally HamFileLeave end;
end;

procedure TWebChannelReader.saveLastModified( const dt: TDateTime );
begin
   HamFileEnter;
   try
      with TIniFile.Create( AppSettings.GetStr(asPathBase) + 'WebReader.hst' ) do try
         WriteDateTime( WebLink, 'LastModified', dt );
      finally Free end;
   finally HamFileLeave end;
end;

function TWebChannelReader.getWebLink: String;
var  u, p: String;
begin
   SplitUrl( mWebLink, Result, u, p );
end;

procedure TWebChannelReader.loadWebFile( const filename: String );
var  sl: TStringList;
begin
   mWebText := '';
   if not FileExists( filename ) then exit;

   sl := TStringList.Create;
   try
      sl.LoadFromFile ( filename );
      mWebText := sl.Text;
   finally sl.Free end;
end;

procedure TWebChannelReader.loadWebText( const comment: String );
var  ssl: TIdSSLIOHandlerSocket;
     dt: TDateTime;
     url, username, password, s: String;
begin
   url := mWebLink;
   try

      mHttp := TIdHTTP.Create( nil );
      try

         (*
         Indy has some bugs related with "If-Modified-Since"/304. You either
         need a patched IdHTTP.pas or have to remove below line
         'http.Request.LastModified := dt;'. A patched IdHTTP.pas is included
         in folder "indy9" - see "indy-readme.txt" in this folder for details.
         *)
         mHttp.HPG_BUG_FIXES_AVAILABLE := True;

         // Proxy settings
         if (mProxyServer <> '') and (mProxyPort > 0) then begin

            Log( LOGID_DEBUG, 'Using proxy ' + mProxyServer
                            + ':' + IntToStr( mProxyPort )
                            + iif( mProxyUser<>'', ' with password' ) );
            mHttp.ProxyParams.ProxyServer   := mProxyServer;
            mHttp.ProxyParams.ProxyPort     := mProxyPort;

            if copy( mProxyUser, 1, 1 ) = '$' then begin
               s := mProxyUser;  // script-password ($0..$99)
               if not Hamster.Passwords.UsePassword( s, username, password ) then begin
                  mError := Format( 'Missing username/password for "%s"!', [s] );
                  exit;
               end;
               mHttp.ProxyParams.ProxyUsername := username;
               mHttp.ProxyParams.ProxyPassword := password;
               mHttp.ProxyParams.BasicAuthentication := True;
            end else if mProxyUser <> '' then begin
               mHttp.ProxyParams.ProxyUsername := mProxyUser;
               mHttp.ProxyParams.ProxyPassword := mProxyPass;
               mHttp.ProxyParams.BasicAuthentication := True;
            end;
            
         end;

         // SSL settings
         if copy( url, 1, 8 ) = 'https://' then begin

            if not Hamster.SSLAvailable then begin
               mError := 'Required SSL libraries for https are not installed!';
               exit;
            end;

            ssl := TIdSSLIOHandlerSocket.Create(nil);
            mHttp.IOHandler := ssl;

         end;

         // Use authentication with URLs of type 'http://USERNAME:PASSWORD@...'
         // or 'http://$42@...'.
         SplitUrl( mWebLink, url, username, password );
         if username <> '' then begin

            if copy( username, 1, 1 ) = '$' then begin
               s := username;  // script-password ($0..$99)
               if not Hamster.Passwords.UsePassword( s, username, password ) then begin
                  mError := Format( 'Missing username/password for "%s"!', [s] );
                  exit;
               end;
            end;

            mHttp.Request.BasicAuthentication := True;
            mHttp.Request.Username := username;
            mHttp.Request.Password := password;

         end;

         // User Agent to be placed in webserver's logfile
         mHttp.Request.UserAgent := OUR_VERINFO + ' (WebReader'
                                 + iif( comment <> '', ', ' + comment ) + ')';

         // Use 'If-Modified-Since' to reduce traffic?
         if loadLastModified( dt ) then begin
            mHttp.Request.LastModified := dt;
         end;

         // load feed
         try
            mHttp.ConnectTimeout  := 30000;
            mHttp.ReadTimeout     := 30000;
            mHttp.HandleRedirects := True;
            mWebText := mHttp.Get( url );
            mWebType := mHttp.Response.ContentType;

         except
            on ex: EIdHTTPProtocolException do begin
               // Indy-Bug: 304 is not handled correctly but leads to Exception
               if ex.ReplyErrorCode = 304 then begin // Indy-Bug
                  mModified := False;
                  exit;
               end else begin
                  raise;
               end;
            end else begin
               raise;
            end;
         end;

         if mHttp.Response.ResponseCode = 304 then begin // "Not Modified"
            mModified := False;
            exit;
         end;

         if mHttp.Response.LastModified > 0 then begin
            saveLastModified( mHttp.Response.LastModified );
         end else begin
            saveLastModified( Now );
         end;

         // fix linefeeds
         if Pos( #13#10, mWebText ) = 0 then begin
            mWebText := StringReplace( mWebText, #10, #13#10, [rfReplaceAll] );
         end;

      finally
         FreeAndNil( mHttp );
      end;

   except
      on ex: Exception do mError := ex.Message + ' (url="' + url + '")';
   end;
end;

procedure TWebChannelReader.parseWebText;
var  doc    : TXmlDocument;
     ver    : TWebChannelVersion;
     elem   : TXMLElement;
     attr   : TXMLAttribute;
     channel: TWebChannel;
     verStr, e: String;
     i: Integer;
     isHtml, isRss, isAtom: Boolean;
begin
   try

      doc := TXmlDocument.Create;
      try

         doc.AsText := mWebText;
         mCharSet := doc.CharSet;

         isHtml := False;
         isRss  := False;
         isAtom := False;

         if (doc.Root = nil) or (LowerCase( doc.Root.ElemText ) = 'html') then begin

            isHtml := True;
            ver    := html;
            verStr := 'HTML';
            
         end else begin

            e := LowerCase( doc.Root.ElemText );
            attr := doc.Root.Attribute[ 'version' ];

            if e = 'rss' then begin
               isRss := True;
               if attr = nil then begin
                  ver := rss091;
                  verStr := 'RSS/0.91 (assumed)';
               end else begin
                  verStr := 'RSS/' + attr.Value;
                  if copy(attr.Value,1,3) = '0.9' then begin
                     ver := rss091;
                  end else begin
                     ver := rss20;
                  end;
               end;
            end else if e = 'rdf:rdf' then begin
               isRss := True;
               ver := rss10;
               verStr := 'RSS/1.0 (assumed)';
            end else if e = 'feed' then begin
               isAtom := True;
               ver := atom03;
               if attr = nil then verStr := 'ATOM/0.3 (assumed)'
                             else verStr := 'ATOM/' + attr.Value;
            end else begin
               raise Exception.Create( 'Unsupported root element: ' + doc.Root.ElemText );
            end;

         end;

         channel := nil;

         if isHtml then begin
         
            channel := TWebChannel.Create;
            channel.WebChannelVersion := ver;
            channel.WebChannelVersionStr := verStr;
            channel.Description := mWebText;
            channel.DescType    := mWebType;

         end else if isRss then begin

            for i := 0 to doc.Root.Elements.Count - 1 do begin

               elem := doc.Root.Elements.Item[i];
               e := LowerCase( elem.ElemText );

               if e = 'channel' then begin

                  if channel <> nil then mChannels.Add( channel );

                  channel := TWebChannel.Create;
                  channel.WebChannelVersion := ver;
                  channel.WebChannelVersionStr := verStr;
                  parseRssChannel( channel, elem );

               end else if e = 'item' then begin // 1.0
                  if channel = nil then raise Exception.Create( 'Missing channel!' );
                  parseRssItem( channel, elem );

               end;

            end;

         end else if isAtom then begin

            channel := TWebChannel.Create;
            channel.WebChannelVersion := ver;
            channel.WebChannelVersionStr := verStr;
            parseAtomFeed( channel, doc.Root );

         end;

         mChannels.Add( channel );

      finally
         doc.Free;
      end;

   except
      on ex: Exception do mError := ex.Message;
   end;
end;

function getAtomAuthor( elemAuthor: TXMLElement ): String;
var  elem: TXMLElement;
     i: Integer;
begin
   Result := '';
   for i := 0 to elemAuthor.Elements.Count -1 do begin
      elem := elemAuthor.Elements.Item[i];
      if elem.ElemType = XML_ELEMENT then begin
         if elem.ElemText = 'email' then Result := Result + ' <';
         if elem.Elements.Count > 0 then
            Result := Result + elem.Elements.Item[0].ElemText;
         if elem.ElemText = 'email' then Result := Result + '> ';
      end else if elem.ElemType = XML_TEXT then begin
         Result := Result + elem.ElemText;
      end;
   end;
end;

function getElementText( elemBase: TXMLElement ): String;
var  elem: TXMLElement;
     attr: TXMLAttribute;
     i, k: Integer;
begin
   Result := '';

   for i := 0 to elemBase.Elements.Count -1 do begin

      elem := elemBase.Elements.Item[i];

      if elem.ElemType = XML_ELEMENT then begin
         Result := Result + '<' + elem.ElemText;
         for k := 0 to elem.Attributes.Count - 1 do begin
            attr := elem.Attributes.Item[ k ];
            if attr <> nil then begin
               Result := Result + ' ' + attr.Name + '="' + attr.Value + '"'
            end;
         end;
         Result := Result + '>'
                          + getElementText( elem )
                          + '</' + elem.ElemText + '>';
      end else begin
         Result := Result + elem.ElemText;
      end;
      
   end;
end;

procedure TWebChannelReader.parseAtomFeed( webChannel: TWebChannel; elemFeed: TXMLElement);
var  elem: TXMLElement;
     attr: TXMLAttribute;
     e: String;
     i: Integer;
begin
   for i := 0 to elemFeed.Elements.Count - 1 do begin

      elem := elemFeed.Elements.Item[i];
      e := LowerCase( elem.ElemText );

      if e = 'title' then begin
         webChannel.Title := getElementText( elem );

      end else if e = 'link' then begin
         attr := elem.Attribute[ 'href' ];
         if attr <> nil then webChannel.Link := attr.Value;

      end else if e = 'author' then begin
         webChannel.Creator := getAtomAuthor( elem );
      end else if e = 'managingeditor' then begin
         if webChannel.Creator = '' then webChannel.Creator := getElementText( elem );
      end else if e = 'webmaster' then begin
         if webChannel.Creator = '' then webChannel.Creator := getElementText( elem );
      end else if e = 'dc:creator' then begin
         if webChannel.Creator = '' then webChannel.Creator := getElementText( elem );
      end else if e = 'dc:publisher' then begin
         if webChannel.Creator = '' then webChannel.Creator := getElementText( elem );

      end else if e = 'modified' then begin
         webChannel.DateTime := getElementText( elem );
      end else if e = 'pubDate' then begin
         webChannel.DateTime := getElementText( elem );

      end else if e = 'entry' then begin
         parseAtomEntry( webChannel, elem );

      end else if e = 'tagline' then begin
         webChannel.Description := getElementText( elem );

      end;

   end;
end;

procedure TWebChannelReader.parseAtomEntry( webChannel: TWebChannel; elemEntry: TXMLElement );
var  i: Integer;
     e: String;
     elem: TXMLElement;
     attr: TXMLAttribute;
     item: TWebChannelItem;
begin
   item := TWebChannelItem.Create( webChannel );
   try

      for i := 0 to elemEntry.Elements.Count - 1 do begin

         elem := elemEntry.Elements.Item[i];
         e := LowerCase( elem.ElemText );

         if e = 'title' then begin  
            item.Title := getElementText( elem );

         end else if e = 'link' then begin
            attr := elem.Attribute[ 'href' ];
            if attr <> nil then item.Link := attr.Value;

         end else if e = 'summary' then begin
            if item.Description = '' then item.Description := getElementText( elem );

         end else if (e = 'content') then begin
            item.Description := getElementText( elem );

         end else if e = 'author' then begin
            item.Creator := getAtomAuthor( elem );
         end else if e = 'managingeditor' then begin
            if item.Creator = '' then item.Creator := getElementText( elem );
         end else if e = 'webmaster' then begin
            if item.Creator = '' then item.Creator := getElementText( elem );
         end else if e = 'dc:creator' then begin
            if item.Creator = '' then item.Creator := getElementText( elem );
         end else if e = 'dc:publisher' then begin
            if item.Creator = '' then item.Creator := getElementText( elem );

         end else if e = 'issued' then begin
            if elem.Elements.Count > 0 then begin
               item.DateTime := elem.Elements.Item[0].ElemText;
               if webChannel.DateTime = '' then webChannel.DateTime := item.DateTime;
            end;
         end else if e = 'modified' then begin
            item.DateTime := getElementText( elem );
            if webChannel.DateTime = '' then webChannel.DateTime := item.DateTime;
         end else if e = 'pubDate' then begin
            item.DateTime := getElementText( elem );
            if webChannel.DateTime = '' then webChannel.DateTime := item.DateTime;

         end;

      end;

      if item.IsValid then webChannel.mWebItems.Add( item );

   except
      item.Free;
      raise;
   end;
end;


procedure TWebChannelReader.parseRssChannel( webChannel: TWebChannel; elemChannel: TXMLElement );
var  elem: TXMLElement;
     e: String;
     i: Integer;
begin
   for i := 0 to elemChannel.Elements.Count - 1 do begin

      elem := elemChannel.Elements.Item[i];
      e := LowerCase( elem.ElemText );

      if e = 'title' then begin
         webChannel.Title := getElementText( elem );

      end else if e = 'link' then begin
         webChannel.Link := getElementText( elem );

      end else if e = 'description' then begin
         webChannel.Description := getElementText( elem );

      end else if e = 'language' then begin
         webChannel.Language := getElementText( elem );

      end else if e = 'item' then begin
         parseRssItem( webChannel, elem ); // 0.91, 2.0

      end else if e = 'items' then begin
         // ignore 1.0 item list

      end else if e = 'pubdate' then begin
         webChannel.DateTime := getElementText( elem );
      end else if e = 'modified' then begin
         if webChannel.DateTime = '' then
            webChannel.DateTime := getElementText( elem );
      end else if e = 'lastbuilddate' then begin
         if webChannel.DateTime = '' then
            webChannel.DateTime := getElementText( elem );
      end else if e = 'dc:date' then begin
         if webChannel.DateTime = '' then
            webChannel.DateTime := getElementText( elem );

      end else if e = 'dc:creator' then begin
         webChannel.Creator := getElementText( elem );
      end else if e = 'managingeditor' then begin
         if webChannel.Creator = '' then webChannel.Creator := getElementText( elem );
      end else if e = 'webmaster' then begin
         if webChannel.Creator = '' then webChannel.Creator := getElementText( elem );
      end else if e = 'author' then begin
         webChannel.Creator := getElementText( elem );
      end else if e = 'dc:publisher' then begin
         if webChannel.Creator = '' then webChannel.Creator := getElementText( elem );

      end;

   end;
end;

procedure TWebChannelReader.parseRssItem( webChannel: TWebChannel; elemItem: TXMLElement );
var  i: Integer;
     e: String;
     elem: TXMLElement;
     item: TWebChannelItem;
begin
   item := TWebChannelItem.Create( webChannel );
   try

      for i := 0 to elemItem.Elements.Count - 1 do begin

         elem := elemItem.Elements.Item[i];
         e := LowerCase( elem.ElemText );

         if e = 'title' then begin
            item.Title := getElementText( elem );

         end else if e = 'link' then begin
            item.Link := getElementText( elem );

         end else if e = 'description' then begin
            item.Description := getElementText( elem );

         end else if e = 'dc:creator' then begin
            item.Creator := getElementText( elem );
         end else if e = 'managingeditor' then begin
            if item.Creator = '' then item.Creator := getElementText( elem );
         end else if e = 'webmaster' then begin
            if item.Creator = '' then item.Creator := getElementText( elem );
         end else if e = 'author' then begin
            item.Creator := getElementText( elem );
         end else if e = 'dc:publisher' then begin
            if item.Creator = '' then item.Creator := getElementText( elem );

         end else if e = 'dc:date' then begin
            item.DateTime := getElementText( elem );
            if webChannel.DateTime = '' then webChannel.DateTime := item.DateTime;
         end else if e = 'pubdate' then begin
            item.DateTime := getElementText( elem );
            if webChannel.DateTime = '' then webChannel.DateTime := item.DateTime;

         end;

      end;

      if item.IsValid then webChannel.mWebItems.Add( item );

   except
      item.Free;
      raise;
   end;
end;

function filter( s: String ): String;
begin
   Result := StringReplace( s, #13#10, ' ', [rfReplaceAll] );
   Result := HtmlToText( Result );
end;

function quoteWrap( s: String ): String;
var  h: String;
     j, p: Integer;
begin
   Result := '';
   s := HtmlToText( s );
   while length( s ) > 0 do begin
      p := Pos( #13#10, s );
      if (p > 0) and (p <= 76) then begin
         h := Trim( copy( s, 1, p-1 ) );
         s := copy( s, p+2, MaxInt );
      end else begin
         p := 76;
         if length(s) > p then begin
            for j := Min( length(s), 76 ) downto 1 do begin
               if s[j] = ' ' then begin
                  p := j;
                  break;
               end;
            end;
         end;
         if (p = 76) and (length(s) > 76) and (s[p] <> ' ') then begin
            p := Pos( ' ', s );
            if p = 0 then p := Length( s );
         end;
         h := Trim( copy( s, 1, p ) );
         s := copy( s, p+1, MaxInt );
      end;
      Result := Result + '|  ' + h + #13#10;
   end;
end;

function TWebChannelReader.encodeHdr( hdrVal: String ): String;
begin
   Result := filter( hdrVal );
   if not AnsiSameText( mCharSet, 'UTF-8' ) then begin
      Log( LOGID_DEBUG, 'HdrCvt ' + mCharSet + ' ' + 'UTF-8' );
   end;
   Result := EncodeHeadervalue( Result, mCharSet, 'UTF-8' );
end;

function TWebChannelReader.encodeTxt( hdrVal: String ): String;
begin
   if not AnsiSameText( mCharSet, 'UTF-8' ) then begin
      Log( LOGID_DEBUG, 'TxtCvt ' + mCharSet + ' ' + 'UTF-8' );
      CharSets.Convert( mCharSet, 'UTF-8', hdrVal );
   end;
   Result := hdrVal;
end;


procedure TWebChannelReader.GetAllItemsAsText( channel: TWebChannel; out chSubject, chFrom, chText: String;
  out chTime: TDateTime );
begin
   GetSelectedItemsAsText( channel, nil, chSubject, chFrom, chText, chTime );
end;

procedure TWebChannelReader.GetSelectedItemsAsText( channel: TWebChannel; indexes: TList;
  out chSubject, chFrom, chText: String; out chTime: TDateTime );
var  i: Integer;
     ok: Boolean;
     item: TWebChannelItem;
begin
   chSubject := '(no RSS title)';
   chFrom    := 'webfeed <webfeed@unknown.invalid>';
   chText    := '';
   chTime    := Now;

   if channel = nil then begin
      chSubject := 'Channel Error!';
      chText := chText + 'Channel:' + #13#10 + quoteWrap( WebLink ) + #13#10;
      chText := chText + #13#10;
      chText := chText + 'Error:' + #13#10 + quoteWrap( Error ) + #13#10;
      chText := chText + #13#10;
      chText := chText + '-- ' + #13#10;
      chText := chText + 'Time: ' + DateTimeToStr( Now ) + #13#10;
      exit;
   end;

   if channel.WebChannelVersion = html then begin

      chText := chText + 'Link: ' + WebLink + #13#10#13#10;

      chText := HtmlToText( channel.Description );
      i := Pos( #13, chText );
      if i > 1 then chSubject := Trim( copy( chText, 1, i-1 ) );
      chText := encodeTxt( chText );
      chSubject := encodeHdr( chSubject );

      if Error <> '' then begin
         chText := chText + #13#10'-- '#13#10;
         chText := chText + 'Feed: ' + WebLink + #13#10;
         chText := chText + 'ERROR: ' + Error + #13#10;
         chText := chText + 'Time: ' + DateTimeToStr( Now ) + #13#10;
      end;

      exit;
   end;

   if channel.DateTime <> '' then begin
      chTime := ParseWebDateTime( channel.DateTime );
   end;

   if channel.Title <> '' then chSubject := encodeHdr( channel.Title );

   if channel.Creator <> '' then begin
      chFrom := channel.Creator;
      if Pos( '@', chFrom ) = 0 then chFrom := chFrom + ' <webfeed@unknown.invalid>';
      chFrom := encodeHdr( chFrom );
   end;

   if channel.Title <> '' then begin
      chText := chText + filter( channel.Title );
      chText := chText + #13#10;
   end;

   if channel.Description <> '' then chText := chText + quoteWrap( channel.Description );
   if channel.Link        <> '' then chText := chText + 'Link: ' + channel.Link + #13#10;
   if (WebLink<>'') and (channel.Link <> WebLink) then chText := chText + 'Feed: ' + WebLink + #13#10;
   chText := chText + #13#10;

   for i := 0 to channel.Count - 1 do begin

      if indexes = nil then begin
         ok := True;
      end else begin
         ok := ( indexes.IndexOf( Pointer(i) ) >= 0 );
      end;

      if ok then begin

         item := channel.Items[i];

         chText := chText + #13#10;

         if item.Title <> '' then begin
            chText := chText + filter( item.Title );
            if (item.Creator <> '') and (item.Creator <> channel.Creator) then begin
               chText := chText + ' (' + Trim( item.Creator );
               (*
               if item.DateTime <> '' then begin
                  chText := chText + ', ' + UnifyWebDateTime(item.DateTime);
               end;
               *)
               chText := chText + ')';
            end else begin
               (*
               if item.DateTime <> '' then begin
                  chText := chText + ' (' + UnifyWebDateTime(item.DateTime) + ')';
               end;
               *)
            end;
            chText := chText + #13#10;
         end;

         if item.Description <> '' then chText := chText + quoteWrap( item.Description );

         if item.Link        <> '' then chText := chText + filter(item.Link) + #13#10;

      end;

   end;

   if Error <> '' then begin
      chText := chText + #13#10'-- '#13#10;
      chText := chText + 'Feed: ' + WebLink + #13#10;
      chText := chText + 'ERROR: ' + Error + #13#10;
      chText := chText + 'Time: ' + DateTimeToStr( Now ) + #13#10;
   end;

   chText := encodeTxt( chText );
end;

procedure TWebChannelReader.GetSingleItemAsText( channel: TWebChannel; index: Integer; out chSubject,
  chFrom, chText: String; out chTime: TDateTime; addHtml: Boolean; out chHtml, chType: String );
var  item: TWebChannelItem;
     i: Integer;
begin
   chSubject := '(no RSS title)';
   chFrom    := 'webfeed <webfeed@unknown.invalid>';
   chText    := '';
   chTime    := Now;

   if channel.WebChannelVersion = html then begin // no feed but HTML page

      chText := chText + 'Link: ' + WebLink + #13#10#13#10;

      if addHtml then begin
         chHtml := channel.Description;
         chType := channel.DescType;
      end else begin
         chText := HtmlToText( channel.Description );
         i := Pos( #13, chText );
         if i > 1 then chSubject := Trim( copy( chText, 1, i-1 ) );
         chText := encodeTxt( chText );
         chSubject := encodeHdr( chSubject );
      end;

      if Error <> '' then begin
         chText := chText + #13#10'-- '#13#10;
         chText := chText + 'Feed: ' + WebLink + #13#10;
         chText := chText + 'ERROR: ' + Error + #13#10;
         chText := chText + 'Time: ' + DateTimeToStr( Now ) + #13#10;
      end;

      exit;

   end;

   if channel.Title <> '' then chSubject := encodeHdr( channel.Title );

   if channel.Creator <> '' then begin
      chFrom := channel.Creator;
      if Pos( '@', chFrom ) = 0 then chFrom := chFrom + ' <webfeed@unknown.invalid>';
      chFrom := encodeHdr( chFrom );
   end;

   item := channel.Items[index];

   if item.DateTime <> '' then begin
      chTime := ParseWebDateTime( item.DateTime );
   end else if channel.DateTime <> '' then begin
      chTime := ParseWebDateTime( channel.DateTime );
   end;

   chText := chText + #13#10;

   if item.Title <> '' then begin

      chSubject := encodeHdr( item.Title );
      
      chText := chText + filter( item.Title );

      if (item.Creator <> '') and (item.Creator <> channel.Creator) then begin

         chFrom := item.Creator;
         if Pos( '@', chFrom ) = 0 then chFrom := chFrom + ' <webfeed@unknown.invalid>';
         chFrom := encodeHdr( chFrom );

         chText := chText + ' (' + Trim( item.Creator );
         (*
         if item.DateTime <> '' then begin
            chText := chText + ', ' + UnifyWebDateTime( item.DateTime );
         end;
         *)
         chText := chText + ')';

      end else begin
         (*
         if item.DateTime <> '' then begin
            chText := chText + ' (' + UnifyWebDateTime( item.DateTime ) + ')';
         end;
         *)
      end;
      
      chText := chText + #13#10#13#10;
   end;
   if item.Description <> '' then begin
      chText := chText + quoteWrap( item.Description ) + #13#10;
   end;

   chHtml := '';
   chType := '';
   if item.Link <> '' then begin
      if addHtml then try
         mHttp := TIdHttp.Create( nil );
         try
            chHtml := LoadLinkWebText(
                         mHttp, item.Link,
                         mProxyServer, mProxyPort, mProxyUser, mProxyPass,
                         chType );
         finally FreeAndNil( mHttp ) end;
      except end;
      chText := chText + filter(item.Link) + #13#10#13#10;
   end;

   chText := chText + #13#10;
   chText := chText + '-- ' + #13#10;

   if channel.Title <> '' then begin
      chText := chText + filter( channel.Title );
      chText := chText + #13#10;
   end;

   if channel.Description <> '' then chText := chText + quoteWrap( channel.Description );
   if channel.Link        <> '' then chText := chText + 'Link: ' + channel.Link + #13#10;
   if (WebLink<>'') and (channel.Link <> WebLink) then chText := chText + 'Feed: ' + WebLink + #13#10;
   chText := chText + #13#10;

   if Error <> '' then begin
      chText := chText + #13#10'-- '#13#10;
      chText := chText + 'Feed: ' + WebLink + #13#10;
      chText := chText + 'ERROR: ' + Error + #13#10;
      chText := chText + 'Time: ' + DateTimeToStr( Now ) + #13#10;
   end;

   chText := encodeTxt( chText );
end;

end.
