Google
Web alhem.net

Readhtml.cpp

Go to the documentation of this file.
00001 // Readhtml.cpp
00002 /*
00003 Copyright (C) 2003  Anders Hedstrom
00004 
00005 This program is free software; you can redistribute it and/or
00006 modify it under the terms of the GNU General Public License
00007 as published by the Free Software Foundation; either version 2
00008 of the License, or (at your option) any later version.
00009 
00010 This program is distributed in the hope that it will be useful,
00011 but WITHOUT ANY WARRANTY; without even the implied warranty of
00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013 GNU General Public License for more details.
00014 
00015 You should have received a copy of the GNU General Public License
00016 along with this program; if not, write to the Free Software
00017 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
00018 */
00019 
00020 #ifdef _WIN32
00021 #pragma warning(disable:4786)
00022 #endif
00023 #include <stdio.h>
00024 #include <string.h>
00025 
00026 #include "Readhtml.h"
00027 
00028 #define DEB(x) 
00029 
00030 namespace Cgi {
00031 
00032 Readhtml::Readhtml(void)
00033 {
00034         fn = NULL;
00035         fil = NULL;
00036         peekc = 0;
00037         buf = NULL;
00038 }
00039 
00040 Readhtml::Readhtml(char *s)
00041 {
00042         Readhtml();
00043         fn = new char[strlen(s) + 1];
00044         strcpy(fn,s);
00045         if ((fil = fopen(s,"rt")) != NULL)
00046                 fread(&peekc,1,1,fil);
00047         buf = new char[MAXL];
00048 }
00049 
00050 Readhtml::~Readhtml(void)
00051 {
00052         if (fil)
00053                 fclose(fil);
00054         if (fn)
00055                 delete fn;
00056         if (buf)
00057                 delete buf;
00058 }
00059 
00060 int Readhtml::getnext(char *s,int bufl)
00061 {
00062         size_t i = 0;
00063         size_t sv = 0;
00064 
00065         if (!fil)
00066                 return -1;
00067         if (feof(fil))
00068                 return 0;
00069 
00070         if (!peekc)
00071                 do
00072                 {
00073                         fread(&peekc,1,1,fil);
00074                 } while (!feof(fil) && (peekc == 13 || peekc == 10));
00075 
00076 // read HTML tag
00077 
00078         if (peekc == '<')
00079         {
00080                 if (i < MAXL - 1)
00081                         buf[i++] = peekc;
00082                 fread(&peekc,1,1,fil);
00083                 while (!feof(fil) && peekc != '>')
00084                 {
00085                         if (peekc == 13 || peekc == 10 || peekc == 9)
00086                                 peekc = 32;
00087                         if (i < MAXL - 1)
00088                                 buf[i++] = peekc;
00089                         fread(&peekc,1,1,fil);
00090                 }
00091                 if (i < MAXL - 1)
00092                         buf[i++] = '>';
00093                 buf[i] = 0;
00094                 buf[bufl - 1] = 0;
00095                 peekc = 0;
00096                 while (buf[strlen(buf) - 1] == 32)
00097                         buf[strlen(buf) - 1] = 0;
00098                 strcpy(s,buf);
00099                 // check comment
00100                 if (!strncmp(buf, "<!--", 4))
00101                 {
00102                         while (!strstr(buf, "-->") && !feof(fil))
00103                         {
00104                                 i = 0;
00105                                 fread(&peekc,1,1,fil);
00106                                 while (!feof(fil) && peekc != '>')
00107                                 {
00108                                         if (peekc == 13 || peekc == 10 || peekc == 9)
00109                                                 peekc = 32;
00110                                         if (i < MAXL - 1)
00111                                                 buf[i++] = peekc;
00112                                         fread(&peekc,1,1,fil);
00113                                 }
00114                                 if (i < MAXL - 1)
00115                                         buf[i++] = '>';
00116                                 buf[i] = 0;
00117                                 buf[bufl - 1] = 0;
00118                                 peekc = 0;
00119                                 while (buf[strlen(buf) - 1] == 32)
00120                                         buf[strlen(buf) - 1] = 0;
00121                                 strcat(s + strlen(s),buf);
00122                         }
00123                         return 3; // comment
00124                 }
00125                 return 1;
00126         }
00127 
00128 // read string
00129 //  0xc4-Auml, 0xc5-Aring, 0xd6-Ouml
00130 //  0xe4-auml, 0xe5-aring, 0xf6-ouml
00131 
00132         if (peekc == 13 || peekc == 10 || peekc == 9)
00133                 peekc = 32;
00134         if (peekc && peekc != 32)
00135                 if (i < MAXL - 1)
00136                         buf[i++] = peekc;
00137         do
00138         {
00139                 fread(&peekc,1,1,fil);
00140                 if (peekc == 13 || peekc == 10 || peekc == 9)
00141                         peekc = 32;
00142         } while (peekc == 32 && !feof(fil));
00143         while (!feof(fil) && peekc != '<')
00144         {
00145                 if (peekc == 13 || peekc == 10 || peekc == 9)
00146                         peekc = 32;
00147                 if (i < MAXL - 1)
00148                         buf[i++] = peekc;
00149                 fread(&peekc,1,1,fil);
00150 // debug udde tecken
00151                 if (peekc < 0)
00152                 {
00153                         buf[i] = 0;
00154                         DEB(printf("%s 0x%02x\n",buf,(unsigned)peekc % 256);)
00155                         sv++;
00156                 }
00157         }
00158         buf[i] = 0;
00159         buf[bufl - 1] = 0;
00160         {
00161                 char *s = strstr(buf, "&nbsp;");
00162                 while (s)
00163                 {
00164                         *s = ' ';
00165                         memcpy(s + 1, s + 6, strlen(s + 6) + 1);
00166                         //
00167                         s = strstr(buf, "&nbsp;");
00168                 }
00169         }
00170         while (buf[strlen(buf) - 1] == 32)
00171                 buf[strlen(buf) - 1] = 0;
00172         strcpy(s,buf);
00173         DEB(if (sv)
00174                 printf("%s\n",buf);)
00175         if (!*s && !feof(fil))
00176                 return getnext(s, bufl);
00177         return 2;
00178 }
00179 
00180 Readhtml::Readhtml(const Readhtml& ) 
00181 {
00182 }
00183 
00184 Readhtml& Readhtml::operator=(const Readhtml& ) 
00185 { 
00186         return *this; 
00187 }
00188 
00189 } // namespace

Generated for cgi++ by doxygen 1.3.7

Page, code, and content Copyright (C) 2004 by Anders Hedström