
/* OpenWebSpider
 *
 *  Author:     Stefano Alimonti aka Shen139
 *  Mail:       shen139 [at] openwebspider (dot) org
 *
 *
 * This file is part of OpenWebSpider
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */


#ifndef __OPTIONS
#define __OPTIONS

#ifndef WIN32
#define DWORD	long long int
#define LPVOID	void*
#define HANDLE	pthread_t
#endif

/*Host*/
#define PORT                80
#define MAXHOSTSIZE         100
#define MAXPAGESIZE         255
#define MAXURLSIZE          MAXHOSTSIZE + MAXPAGESIZE + 30

/*Html*/
#define	MAXPACKETSIZE       200000
#define MAXTAGSIZE          20
#define MAXDESCRIPTIONSIZE  255
#define MAXTAGLENGTH        10000
#define	MAXHTTPSTATUSSIZE   50

/*robots.txt*/
#define MAXDISALLOW         100
#define MAXCRAWLDELAY       999

/*RANK*/
#define MAXPRLEV            10

/*SQL*/
#define MAXQUERYSIZE        MAXPACKETSIZE + 50000
#define MAXUSERQUERYSIZE    200
#define CREATE_TMP_TABLE(tab)    "CREATE TABLE `%s` ("							        \
                                "		`idpage` int(11) NOT NULL auto_increment,"		\
                                "		`hostname` varchar(100) NOT NULL default '',"	\
                                "		`page` varchar(255) NOT NULL default '',"		\
                                "		`description` varchar(255) NOT NULL default '',"\
                                "		`html` LONGTEXT NOT NULL,"							\
								"       `htmlcache` LONGBLOB,"                              \
                                "		`version` int(11) NOT NULL default '0',"		\
                                "		`level` int(11) NOT NULL default '0', "			\
								"       `rank` int(11) NOT NULL default '0',  "         \
                                "		`date` varchar(10) NOT NULL default '',"		\
                                "		`time` varchar(10) NOT NULL default '',"		\
                                "		PRIMARY KEY  (`idpage`)"						\
                                "	  ) ENGINE=MyISAM CHARSET=utf8 ;",tab


/*Socket*/
#define FIRSTTIMEOUT         50000      /*MSeconds*/
#define TIMEOUTs             3000       /*MSeconds*/

/*Thread*/
#define MAXMUTEX             10
#define	MAXTHREAD            1100
#define NO_BLOCK             -1
#define BLOCKTHRDHST         0
#define BLOCKDB1             1
#define BLOCKINDEX           2
#define BLOCKEXH             3
#define BLOCKEXCRAWL         4
#define AVGTHREADDELAY       100000	/*100 seconds*/

/*Misc*/
#define MAXKEYSIZE          20
#define MAXEXTERNALNODE     1000
#define MAXOUTPUTLINE       500
#define MAXREGULAREXPRESSIONSIZE 100
#define MAXCUSTOMEXTENSIONS      10
#define MAXCUSTOMEXTENSIONSIZE   50
#define MAXEXTENSIONSIZE         10


int nThread=20;

char	DB1[40];
char	DB2[40];
char	DB3[40];
char	MYSQLSERVER1[40];
char	MYSQLSERVER2[40];
char	MYSQLSERVER3[40];
char	USERDB1[40];
char	USERDB2[40];
char	USERDB3[40];
char	PASSDB1[40];
char	PASSDB2[40];
char	PASSDB3[40];

typedef struct sHost
{
	char Host[MAXHOSTSIZE];
	char Page[MAXPAGESIZE];
	char Description[MAXDESCRIPTIONSIZE];
	unsigned short int port;
	unsigned short int type;
	unsigned short int viewed;
	unsigned short int level;
}SHOST;

/*
 * bTag = Tag begin for?
 * eTag = Attribute
 * flag = 0 : <tag1 attr=123> xyz </tag1> eg.: <a href="/index.php">Home</a>
 *      = 1 : <tag2 attr2="test">         eg.: <base href="http://www.openwebspider.org/">
 */
struct
{
    char* bTag;
    char* eTag;
    int   flag;
} taglist[] = 
    {
      { "base"   ,"href" ,0 },
      { "a"      ,"href" ,1 },
      { "ref"    ,"href" ,0 },
      { "area"   ,"href" ,0 },
      { "frame"  ,"src"  ,0 },
      { "iframe" ,"src"  ,0 },

/* ****EXAMPLE******
	  { "img" ,"src"          ,0 },
	  { "body" ,"background"  ,0 },
******************** */

      { ""       ,""     ,-1 }
    };

unsigned int xCacheHtml=0;
unsigned int xCacheHtmlCompressed=0;
unsigned int nPagesViewed=0;
long int bytesDownloaded=0;
char startTime[10];
DWORD startTimeMS;
unsigned int nMaxPagesPerSite=0;
unsigned int nRelationships=1;
unsigned int maxDepthLevel=0;
unsigned int bDontIndexPages=0;
unsigned int bTesting=0;
unsigned int starthostonly=0;

unsigned int scan_mode=0;
/* scan_mode==0 => Real time search		//Deprecated
 * scan_mode==1 => Index
 * scan_mode==2 => Indexed search
 * scan_mode==0xFF => uninitialized
 */

/*Current Host*/
struct sHost IndexingHost;

const char *PlainTextExtension[]=       {
                                            ".txt",".c",
                                            ".cpp",".bas",
                                            ".pas",".h",
                                            "\0"
                                        };


const char *HtmlExtensions[]=           {
											".htm",".html",
                                            ".php", ".asp",
                                            ".cgi",	".mspx",
                                            ".aspx",".shtml",
                                            ".pl",".phtml",
                                            ".cfm",".ch2",
                                            ".jsp",".msnw",
                                            ".php3","\0"
                                         };

char CustomExtensions[MAXCUSTOMEXTENSIONS][MAXCUSTOMEXTENSIONSIZE];

MYSQL gMysqlDB1;
MYSQL gMysqlDB2;
MYSQL gMysqlDB3;
char  gTable[20];

volatile unsigned long hMutex[MAXMUTEX];

DWORD thrdStatus[MAXTHREAD];
/*
thrdStatus[]==0	-> Thread is alive
thrdStatus[]==1	-> Thread is dead
....
*/

unsigned int iQuit=0;
unsigned int bKillThread=0;
unsigned int bKillThreadReserved=0;
unsigned int iStop=0;
unsigned int iDoNextHost=0;
unsigned int bSwapping=0;
unsigned int bAddExternalHost=0;
unsigned int bUseRegularExpressionA=0;
unsigned int bUseRegularExpressionB=0;
unsigned int bUpdate=0;
regex_t regexPageFilter;
regex_t regexContentFilter;

int iLastPing[MAXMUTEX];

char lstRobotsExclusions[MAXDISALLOW][MAXPAGESIZE];
int iRobCrawlDelay=0;	//Crawl delay get by robots.txt
int iCrawlDelay=0;		//crawl delay get by program argumets

int bRobotsOK;


/*module handler*/
void* modHandler;

struct
{
    char* functName;
    char* functInit;
    void* handler;
    void* initHandler;
	unsigned short int isInitialized;
} loadableModules[] = 
   {
      { "modFilter",         "modInitFilter"         , NULL, NULL },
      { "modHandleExtension","modInitHandleExtension", NULL, NULL },
      { "\0",                "\0",                     NULL, NULL },
   };


/* http://www1.tip.nl/~t876506/utf8tbl.html */
struct
{
    char* htmlChar;
	char* rep;
	int type;
	/*
	type: 0 UTF8
	      1 ASCII
	*/
} ahList[] = 
   {
      { "nbsp",  " " , 1 },
      { "amp",   "&" , 1 },
      { "euro",  "0xE282AC", 0 },

/* if you have problems with these lines please contact me */
      { "Aacute","Á", 1 },
      { "aacute","á", 1 },
      { "Eacute","é", 1 },
      { "eacute","é", 1 },
      { "Iacute","Í", 1 },
      { "iacute","í", 1 },
      { "Oacute","Ó", 1 },
      { "oacute","ó", 1 },
      { "Uacute","Ú", 1 },
      { "uacute","ú", 1 },
      { "Agrave","À", 1 },
      { "agrave","à", 1 },
      { "Egrave","È", 1 },
      { "egrave","è", 1 },
      { "Igrave","Ì", 1 },
      { "igrave","ì", 1 },
      { "Ograve","Ò", 1 },
      { "ograve","ò", 1 },
      { "Ugrave","Ù", 1 },
      { "ugrave","ù", 1 },
      { "Acirc", "Â", 1 },
      { "acirc", "â", 1 },
      { "Ecirc", "Ê", 1 },
      { "ecirc", "ê", 1 },
      { "Icirc", "Î", 1 },
      { "icirc", "î", 1 },
      { "Ocirc", "Ô", 1 },
      { "ocirc", "ô", 1 },
      { "Ucirc", "Û", 1 },
      { "ucirc", "û", 1 },
      { "Auml",  "Ä", 1 },
      { "auml",  "ä", 1 },
      { "Euml",  "Ë", 1 },
      { "euml",  "ë", 1 },
      { "Iuml",  "Ï", 1 },
      { "iuml",  "ï", 1 },
      { "Ouml",  "Ö", 1 },
      { "ouml",  "ö", 1 },
      { "Uuml",  "Ü", 1 },
      { "uuml",  "ü", 1 },

      { "Aring", "0xC385",  0 },
      { "aring", "0xC3A5",  0 },
      { "AElig", "0xC386",  0 },
      { "aelig", "0xC3A6",  0 },
      { "Ccedil", "0xC387", 0 },
      { "ccedil", "0xC3A7", 0 },

/*      { "",'' },   */
      { NULL,     0  }
   };

typedef struct functArg
{
	struct sHost* hostInfo;
	char* html;
	unsigned int htmlLength;
	char* text;
	unsigned int textLength;

	int PagesViewed;
	long int bytesDownloaded;

	void* mysqlDB1;
	void* mysqlDB2;
	void* mysqlDB3;
}FUNCTION_ARGUMENT;

/*MACRO*/
#define MIN(a,b)        (a<b)?a:b

#define FREE(x)         if(x)free(x);

#define ERROR_LOG(msg)	{							\
                        FILE* file;					\
                        time_t long_time;			\
                        struct tm *newtime;			\
                            time( &long_time );		\
                            newtime=localtime(&long_time);	\
                            if((file = fopen("error.log","a"))!=NULL)     \
                            { \
                                fprintf(file,"%i\\%i\\%i %i:%i:%i - %s\n",newtime->tm_mday ,newtime->tm_mon +1, newtime->tm_year +1900,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec,msg);	\
                                fclose(file);	\
                            }	\
                         }
	
#endif


/*EOF*/
