
/* OpenWebSpider
 *
 *  Author:     Stefano Alimonti aka Shen139
 *  Mail:       shen139 [at] openwebspider (dot) org
 *
 *
 * This file is part of OpenWebSpider
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

#ifndef __MISC
#define __MISC

#ifndef WIN32

int GetTickCount()
{
struct timeval tv;
	gettimeofday(&tv, NULL);
return tv.tv_sec*1000L+tv.tv_usec/1000L;
}

void SetConsoleTitle(char* msg)
{
return;
}

int stricmp(char*a,char*b)
{
return strcasecmp(a,b);
}

int strnicmp(char*a,char*b,int c)
{
return strncasecmp(a,b,c);
}

void Sleep(int n)
{
	usleep((unsigned)n*1000);	
return;
}

char* _strupr(char*a)
{
int m,i;
	m=strlen(a);

	for(i=0;i<m;i++)
		a[i]=(char)toupper(a[i]);

return a;
}

char* _strlwr(char*a)
{
int m,i;
	m=strlen(a);

	for(i=0;i<m;i++)
		a[i]=(char)tolower(a[i]);

return a;
}

void closesocket(int s)
{
	close(s);
return;
}

int ExitThread(int a)
{
	pthread_exit(&a);
}

int TerminateThread(pthread_t thread,int nothing)
{
return pthread_cancel(thread);
}

void CloseHandle(HANDLE a)
{
return;
}

#endif

int InitIndexing(struct sHost currentHst)
{
int condition = 1;
char sqlQuery[MAXQUERYSIZE];
MYSQL_RES gRes;
MYSQL_RES* tmpRes;
DWORD avgSec;
time_t long_time;
struct tm *newtime;
struct sHost *robots_txt;

#ifdef WIN32
char strTitle[3000];
#endif

	robots_txt=(struct sHost*)malloc(sizeof(struct sHost));
	if(robots_txt==NULL)
		MemoryCorruptedHandler("InitIndexing");
	
	currentHst.viewed = 0;
	memcpy(robots_txt,&currentHst,sizeof(struct sHost));
	strcpy(robots_txt->Page, "/robots.txt");
	robots_txt->level = 1;
	robots_txt->type  = 1;
	lstFirst = lstInit(*robots_txt);

	FREE(robots_txt);

	nPagesViewed=0;
	bytesDownloaded=0;
	startTimeMS=0;


	currentHst.level = 1;
	lstAddHost(&lstFirst,currentHst);

	memcpy(&IndexingHost,&currentHst,sizeof(struct sHost));

	printf("\r\nStart Host:  \t%s\r\n",currentHst.Host);
	printf("Start Page:      \t%s\r\n", currentHst.Page);
	printf("Scan Mode:       \tIndex\r\n");
	printf("Mode:            \t%s\r\n",(starthostonly==1)?"Single Host":"Recursive");
	printf("Max depth level  \t%i\r\n",maxDepthLevel);
	printf("Mysql server n.1:\t%s\r\n",MYSQLSERVER1);
	printf("Mysql server n.2:\t%s\r\n",MYSQLSERVER2);
	printf("Mysql server n.3:\t%s\r\n",MYSQLSERVER3);
	printf("Surfing the net... (press CTRL+C to exit)\r\n");

	{
	int (*modInitFilter)(char*, char*);
	char sError[MAXDESCRIPTIONSIZE];
	int ret;

		if( ( modInitFilter = GetInitModFunctionHandlerByName("modFilter")) )
		{	
			ret=modInitFilter(currentHst.Host,sError);
			if(ret==0)
			{
				printf("\nmodInitFilter(): %s\n\n",sError);
				ERROR_LOG(sError)
				return 0;
			}
		}
	}

	if(!StartUpWinsock())
	{
		fprintf(stderr,"WSAStartup() error\r\n");
		ERROR_LOG("WSAStartup() error")
		return -1;
	}

	SetConsoleTitle("Connecting to mysql...");

	printf("Connecting to Mysql server n.1 (%s)...",MYSQLSERVER1);                 //Hosts
	if(sqlConnect(MYSQLSERVER1, USERDB1, PASSDB1, DB1,&gMysqlDB1)==0)
	{
		fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB1,mysql_error(&gMysqlDB1));

		ERROR_LOG(mysql_error(&gMysqlDB1))
		return -1;
	}

	printf("OK\r\nConnecting to Mysql server n.2 (%s)...",MYSQLSERVER2);           //Pages
	if(sqlConnect(MYSQLSERVER2, USERDB2, PASSDB2, DB2,&gMysqlDB2)==0)
	{
		fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB2,mysql_error(&gMysqlDB2));

		ERROR_LOG(mysql_error(&gMysqlDB2))

		mysql_close(&gMysqlDB2);
		return -1;
	}

	printf("OK\r\nConnecting to Mysql server n.3 (%s)...",MYSQLSERVER3);           //Tmp Tables
	if(sqlConnect(MYSQLSERVER3, USERDB3, PASSDB3, DB3,&gMysqlDB3)==0)
	{
		fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB3,mysql_error(&gMysqlDB3));

		ERROR_LOG(mysql_error(&gMysqlDB3))

		mysql_close(&gMysqlDB3);
		return -1;
	}
	printf("OK\r\n");

	SetConsoleTitle("Creating temp table...");

	do
	{
		RandomTable(gTable);
	}
	while(!CreateTmpTable(gTable));    //Loop until creates a new tmp table!!!


	SetConsoleTitle("...");

	sprintf(sqlQuery,"SELECT * FROM hostlist WHERE hostname = \'http://%s\' limit 1",currentHst.Host);
	if(!my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK))
	{
		tmpRes=mysql_store_result(&gMysqlDB1);
		if (tmpRes)
		{
			memcpy(&gRes,tmpRes,sizeof(MYSQL_RES));
		}
	}

	if(mysql_affected_rows(&gMysqlDB1)>0)
        sprintf(sqlQuery,"UPDATE hostlist SET port=%i, status = 2, lastvisit=curdate() WHERE hostname =\'http://%s\' limit 1", currentHst.port, currentHst.Host);
	else
		//puts current hostname in the db as "Scanning host in progress.." (viewed==2)
		sprintf(sqlQuery,"INSERT INTO hostlist (hostname, port, status, lastvisit) VALUES('http://%s', %i, 2, curdate());", currentHst.Host, currentHst.port);

	my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);

	if(bUpdate==0)
	{
		printf("Deleting old indexed pages from %s...",currentHst.Host);
	
		sprintf(sqlQuery,"DELETE FROM pagelist WHERE hostname =\'%s\'",currentHst.Host);
		my_mysql_query(&gMysqlDB2, sqlQuery,NO_BLOCK);
		
		printf("OK\r\n");

		printf("Deleting old rels for %s...",currentHst.Host);
	
		sprintf(sqlQuery,"DELETE FROM rels WHERE host =\'http://%s\'",currentHst.Host);
		my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);
		
		printf("OK\r\n");

	}

	signal(SIGINT,  sigdie);
	signal(SIGTERM, sigdie);
	
	fflush(stdin);

	SetConsoleTitle("Creating threads...");
	
	CreateThreads();
	/**************************MT********************************/

	printf("\r\n");
	startTimeMS=GetTickCount();
	avgSec=0;

	time( &long_time ); 
	newtime=localtime(&long_time);
	
	sprintf(startTime,"%i:%i:%i",newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec );

	while(condition)
	{
#ifdef WIN32
		sprintf(strTitle,"OpenWebSpiderV%s | Pages: %i | Time: %i sec | host: %s",VERSION,nPagesViewed,(int)((GetTickCount()-startTimeMS)/1000),currentHst.Host);
		SetConsoleTitle(strTitle);
#endif
		CheckThreads();
		
		Sleep(300);
		
		if(iQuit==1)
		{
			printf("\r\n\r\nQuitting: Killing threads...\n\n");

			KillThreads();

			iQuit=0;
			bKillThread=0;

			sprintf(sqlQuery,"UPDATE hostlist SET status = 1,pages=%i WHERE hostname =\'http://%s\' limit 1",nPagesViewed,currentHst.Host);
			
			printStats(&currentHst,0);

			my_mysql_ping(&gMysqlDB1,NO_BLOCK);
			my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);

			FlushTempTable(gTable);



			CalcPageRank(currentHst.Host);

			DoQuit();

		}/*if(iQuit==1)*/

		if(bKillThread==1)
		{
			SetConsoleTitle("Killing threads");
			KillThreads();
			CreateThreads();
		}/*if(bKillThread==1)*/


		thrdBlock(BLOCKTHRDHST);
		if(iDoNextHost==1 ||						/*Switching to the next host*/
			(lstGetNodeByVal(lstFirst,0)==NULL &&
			 lstGetNodeByVal(lstFirst,2)==NULL))
		{
			thrdUnBlock(BLOCKTHRDHST);

			SetConsoleTitle("Killing threads");
			
			bKillThread=1;

			KillThreads();

			if(iDoNextHost==1)
			{
				sprintf(sqlQuery,"UPDATE hostlist SET status = 1,pages=%i WHERE hostname =\'http://%s\' limit 1",nPagesViewed,currentHst.Host);

				my_mysql_ping(&gMysqlDB1,NO_BLOCK);
				my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);

				iDoNextHost=0;

			}

			FlushTempTable(gTable);
			CalcPageRank(currentHst.Host);

			iRobCrawlDelay=0;

			if((ReturnFirstUrl(&currentHst))==-1)
			{
				fprintf(stderr,"\nBuffer empty\n");
				iQuit=1;
			}

			if(iQuit==1)
			{
				DoQuit();
			}

			CreateThreads();

			bKillThread=0;

		}//if(iDoNextHost==1 || (lstGetNodeByVal(lstFirst,0)==NULL && lstGetNodeByVal(lstFirst,2)==NULL))

		thrdUnBlock(BLOCKTHRDHST);
		
	}/*while(condition)*/

return 1;
}

int ReadConfFile()
{
FILE* fConf;
char sLine[120];
int iLine=0;

	DB1[0]=0;
	DB2[0]=0;
	DB3[0]=0;
	MYSQLSERVER1[0]=0;
	MYSQLSERVER2[0]=0;
	MYSQLSERVER3[0]=0;
	USERDB1[0]=0;
	USERDB2[0]=0;
	USERDB3[0]=0;
	PASSDB1[0]=0;
	PASSDB2[0]=0;
	PASSDB3[0]=0;

	printf("+ Trying to load openwebspider.conf...");
	fConf=fopen("openwebspider.conf","r");
	if(fConf==NULL)
	{
		printf("file not found\n\n");
		return 0;
	}
	printf("OK\n");

	while(!feof(fConf))
	{
		memset(sLine,0,sizeof(sLine));
		fgets(sLine,100,fConf);
		iLine++;
		if(strnicmp(sLine,"mysqlserver1=",13)==0)
		{
			strcpy(MYSQLSERVER1,sLine+13);
			ReplaceChr(MYSQLSERVER1,'\r',0);
			ReplaceChr(MYSQLSERVER1,'\n',0);
			printf(" - Server1: %s\n",MYSQLSERVER1);
		}
		else
		if(strnicmp(sLine,"mysqlserver2=",13)==0)
		{
			strcpy(MYSQLSERVER2,sLine+13);
			ReplaceChr(MYSQLSERVER2,'\r',0);
			ReplaceChr(MYSQLSERVER2,'\n',0);
			printf(" - Server2: %s\n",MYSQLSERVER2);
		}
		else
		if(strnicmp(sLine,"mysqlserver3=",13)==0)
		{
			strcpy(MYSQLSERVER3,sLine+13);
			ReplaceChr(MYSQLSERVER3,'\r',0);
			ReplaceChr(MYSQLSERVER3,'\n',0);
			printf(" - Server3: %s\n",MYSQLSERVER3);
		}
		else
		if(strnicmp(sLine,"db1=",4)==0)
		{
			strcpy(DB1,sLine+4);
			ReplaceChr(DB1,'\r',0);
			ReplaceChr(DB1,'\n',0);
			printf(" - Database1: %s\n",DB1);
		}
		else
		if(strnicmp(sLine,"db2=",4)==0)
		{
			strcpy(DB2,sLine+4);
			ReplaceChr(DB2,'\r',0);
			ReplaceChr(DB2,'\n',0);
			printf(" - Database2: %s\n",DB2);
		}
		else
		if(strnicmp(sLine,"db3=",4)==0)
		{
			strcpy(DB3,sLine+4);
			ReplaceChr(DB3,'\r',0);
			ReplaceChr(DB3,'\n',0);
			printf(" - Database3: %s\n",DB3);
		}
		else
		if(strnicmp(sLine,"userdb1=",8)==0)
		{
			strcpy(USERDB1,sLine+8);
			ReplaceChr(USERDB1,'\r',0);
			ReplaceChr(USERDB1,'\n',0);
			printf(" - Username DB1: %s\n",USERDB1);

		}
		else
		if(strnicmp(sLine,"userdb2=",8)==0)
		{
			strcpy(USERDB2,sLine+8);
			ReplaceChr(USERDB2,'\r',0);
			ReplaceChr(USERDB2,'\n',0);
			printf(" - Username DB2: %s\n",USERDB2);
		}
		else
		if(strnicmp(sLine,"userdb3=",8)==0)
		{
			strcpy(USERDB3,sLine+8);
			ReplaceChr(USERDB3,'\r',0);
			ReplaceChr(USERDB3,'\n',0);
			printf(" - Username DB3: %s\n",USERDB3);
		}
		else
		if(strnicmp(sLine,"passdb1=",8)==0)
		{
			strcpy(PASSDB1,sLine+8);
			ReplaceChr(PASSDB1,'\r',0);
			ReplaceChr(PASSDB1,'\n',0);
			printf(" - Password DB1: *****\n");
		}
		else
		if(strnicmp(sLine,"passdb2=",8)==0)
		{
			strcpy(PASSDB2,sLine+8);
			ReplaceChr(PASSDB2,'\r',0);
			ReplaceChr(PASSDB2,'\n',0);
			printf(" - Password DB2: *****\n");
		}
		else
		if(strnicmp(sLine,"passdb3=",8)==0)
		{
			strcpy(PASSDB3,sLine+8);
			ReplaceChr(PASSDB3,'\r',0);
			ReplaceChr(PASSDB3,'\n',0);
			printf(" - Password DB3: *****\n");
		}
		else
		if(sLine[0]=='#' || sLine[0]=='\r' || sLine[0]=='\n' || sLine[0]==0)
			continue;
		else
		{
			printf("Error while parsing openwebspider.conf (Line: %i)\n",iLine);
			
			fclose(fConf);
			return 0;
		}
	}

	if(DB1[0]==0 ||	DB2[0]==0 ||	DB3[0]==0 || MYSQLSERVER1[0]==0 || MYSQLSERVER2[0]==0 || MYSQLSERVER3[0]==0 || USERDB1[0]==0 || USERDB2[0]==0 || USERDB3[0]==0 || PASSDB1[0]==0 ||	PASSDB2[0]==0 ||	PASSDB3[0]==0)
	{
		fclose(fConf);
		
		printf("  - Field in openwebspider.conf missing\n\n");

		return 0;
	}

	fclose(fConf);
return 1;
}

 

int IndexedSearch(char* hostname,char* username,char* password,char* dbname,char* Query)
{
char sqlQuery[MAXQUERYSIZE];
int NumOfResults=0;
MYSQL mysql;
MYSQL_RES result;
MYSQL_ROW row;
DWORD timems;
char QueryTmp[2000];
MYSQL_RES* tRes;

	if(sqlConnect(hostname, username, password, dbname , &mysql)==0)
	{
		fprintf(stderr, "Failed to connect to database: Error: %s\n",mysql_error(&mysql));

	return 0;
	}

	ReplaceStr(Query,QueryTmp,"\"","\\\"");
	ReplaceChr(QueryTmp,'\'',' ');
	ReplaceChr(QueryTmp,'\\',' ');
	
	timems=GetTickCount();
	snprintf(sqlQuery,MAXQUERYSIZE,"select hostname,page,description,match(html,description,hostname,page) against(\'%s\') as relevancy,match(html,description,hostname,page) against(\'%s\' in boolean mode) as wrdcount from pagelist where match(html,description,hostname,page) against(\'%s\' in boolean mode) order by wrdcount DESC,relevancy DESC;",QueryTmp,QueryTmp,QueryTmp);

	if(!mysql_query(&mysql, sqlQuery))
	{
		tRes=mysql_store_result(&mysql);
		if (tRes)
		{
			memcpy(&result,tRes,sizeof(MYSQL_RES));
		}
	}
	else
	{
		tRes=NULL;
		printf("Error while executing query\n\n");
		return -1;
	}

	printf("\nSearch in %i ms - %i results found\n\n",(int)(GetTickCount()-timems),(int)result.row_count);

	if((row=mysql_fetch_row(&result))==NULL)
	{
		printf("Nothing Found\n");

	return 0;
	}
	else
	{
		printf("+ %s\n",row[2]);
		printf(" - Url: http://%s%s\n",row[0],row[1]);
		printf(" - Relevancy: %s\n",row[3]);
		printf(" - Word found: %s\n\n",row[4]);

		while ((row = mysql_fetch_row(&result))!=NULL)
		{
			printf("+ %s\n",row[2]);
			printf(" - Url: http://%s%s\n",row[0],row[1]);
			printf(" - Relevancy: %s\n",row[3]);
			printf(" - Word found: %s\n\n",row[4]);
			NumOfResults++;
		}
		
	}
return 1;
}

int IndexPage(char* html, struct sHost host, unsigned int htmlLength)
{
char *cTmp;
char *pureText;
char tmpTitle[MAXDESCRIPTIONSIZE], title[MAXDESCRIPTIONSIZE];
char sanHostname[MAXHOSTSIZE];
char sanPage[MAXPAGESIZE];
int usetitle=0;
char sqlQuery[MAXQUERYSIZE];
char *htmlcache=NULL;
unsigned int textLength;

	if( bTesting==1 || bDontIndexPages==1)
		return 1;

	if( bUpdate==1 )	//-u ?
		if( IsPageIndexed(&host)==1 )	//Is this page Indexed ?
			return 0;		//Yes, don't re-index

	cTmp = (char*)malloc(MAXPACKETSIZE);
	pureText = (char*)malloc(MAXPACKETSIZE);

	if(cTmp==NULL || pureText==NULL)
	{
		MemoryCorruptedHandler("IndexPage");
	}
	
	if(host.type==1)
	{
		if(BetweenTag(html, "title",tmpTitle ,1,MAXDESCRIPTIONSIZE)>0)
		{
			memset(title,0,MAXDESCRIPTIONSIZE);
			snprintf(title,MAXDESCRIPTIONSIZE-1,"%s",tmpTitle+1);
			usetitle=1;
		}

		textLength=UnHtml(html,cTmp,MAXPACKETSIZE);
		if(sqlTextToUTF8(cTmp,pureText,MAXPACKETSIZE)==0)
			strcpy(pureText,cTmp);

	}
	else //not html
	{
		RemoveShit(html);
		OnlyOneSpace(html,pureText,MAXPACKETSIZE);
		textLength=strlen(pureText);
		
	}

	if(bUseRegularExpressionB==1)	//are we using a regular expression filter?
	{	//yes
		if(regexec(&regexContentFilter, pureText, 0, 0, 0) != 0)
		{
			FREE(cTmp);
			FREE(pureText);

			return 0;
		}
	}	//else continue

{
int (*modFilter)(struct functArg*);

	if( (modFilter = GetModFunctionHandlerByName("modFilter")) )
	{	//we are using a custom function as filter
		struct functArg tmpModArg;

		tmpModArg.hostInfo = &host;
		tmpModArg.html = html;

		tmpModArg.htmlLength = htmlLength;

		tmpModArg.text = pureText;

		tmpModArg.textLength = textLength;

		tmpModArg.mysqlDB1 = &gMysqlDB1;
		tmpModArg.mysqlDB2 = &gMysqlDB2;
		tmpModArg.mysqlDB3 = &gMysqlDB3;

		thrdBlock(BLOCKDB1);
		thrdBlock(BLOCKINDEX);

		if(modFilter(&tmpModArg)==0)
		{
			thrdUnBlock(BLOCKDB1);
			thrdUnBlock(BLOCKINDEX);
			FREE(cTmp);
			FREE(pureText);

			return 0;
		}
		/*else index*/
		
		thrdUnBlock(BLOCKDB1);
		thrdUnBlock(BLOCKINDEX);
	}
}


	(usetitle==1) ? RemoveShit(title):RemoveShit(host.Description);
	
	/*mysql_real_escape_string(&gMysqlDB3, sanHostname, host.Host, strlen(host.Host));
	mysql_real_escape_string(&gMysqlDB3, sanPage, host.Page, strlen(host.Page));

	memset(sqlQuery,0,MAXQUERYSIZE);
	snprintf(sqlQuery,MAXQUERYSIZE,"INSERT DELAYED INTO %s SET hostname = \'%s\',page=\'%s\',description=\'%s\',date=curdate(),time=curtime(),version=%i,level=%i,html=CONCAT(\'%s\') ;",gTable,sanHostname,sanPage,(usetitle==1) ? title: host.Description,DBVERSION,host.level,pureText);
*/
	memset(sqlQuery,0,MAXQUERYSIZE);

    snprintf_mysql_escaped_sql_statement(&gMysqlDB3, sqlQuery, MAXQUERYSIZE, "INSERT DELAYED INTO %s SET hostname = \'%s\',page=\'%s\',description=\'%s\',date=curdate(),time=curtime(),version=%d,level=%d,html=CONCAT(\'%s\') ;",gTable,host.Host,host.Page,(usetitle==1) ? title: host.Description,DBVERSION,host.level,pureText);
	if(sqlQuery[MAXQUERYSIZE-3]!=0)
	{
		sqlQuery[MAXQUERYSIZE-3]='\'';
		sqlQuery[MAXQUERYSIZE-2]=';';
		sqlQuery[MAXQUERYSIZE-1]=0;
	}

	my_mysql_ping(&gMysqlDB3,BLOCKINDEX);
	if(my_mysql_query(&gMysqlDB3, sqlQuery, BLOCKINDEX))
	{
		ERROR_LOG(mysql_error(&gMysqlDB3))
		ERROR_LOG(sqlQuery)
		printf("\r\nQuery Error in function IndexPage(): %s\r\n",mysql_error(&gMysqlDB3));
		printf("Trying to reconnect to server...");
		printf("OK\r\nConnecting to Mysql server n.3 (%s)...",MYSQLSERVER3);
		if(sqlConnect(MYSQLSERVER3, USERDB3, PASSDB3, DB3,&gMysqlDB3)==0)
		{
			printf("ERROR\r\n");
			iQuit=1;
			
			ERROR_LOG(mysql_error(&gMysqlDB3))
			
			FREE(cTmp);
			FREE(pureText);

			return -1;
		}
		printf("OK\r\n");
	}

	FREE(cTmp);
	FREE(pureText);

	if(host.type==1 && xCacheHtml==1)	//saves html cache
	{
		htmlcache=(char*)malloc( ( strlen(html) + 1 ) * 2 );

		if(htmlcache==NULL)
			MemoryCorruptedHandler("IndexPage");

		mysql_real_escape_string(&gMysqlDB3, htmlcache, html, strlen(html));

		if(xCacheHtmlCompressed==1)
			sprintf(sqlQuery,"UPDATE %s SET htmlcache=COMPRESS('%s') WHERE hostname='%s' and page='%s';",gTable,htmlcache,sanHostname,sanPage);
		else
			sprintf(sqlQuery,"UPDATE %s SET htmlcache='%s' WHERE hostname='%s' and page='%s';",gTable,htmlcache,sanHostname,sanPage);

		if(my_mysql_query(&gMysqlDB3, sqlQuery, BLOCKINDEX))
		{
			ERROR_LOG(mysql_error(&gMysqlDB3))
			printf("\r\nQuery Error in function IndexPage(): %s\r\n",mysql_error(&gMysqlDB3));
		}

		FREE(htmlcache);
	}
	
return 1;
}

/*
 * flag=0 -> complete stats
 * flag=1 -> in-complete stats
 * flag=2 -> switched to the next host
 */
void printStats(struct sHost* Host,int flag)
{
time_t long_time;
struct tm *newtime;
FILE* file;

	time( &long_time ); 
	newtime=localtime(&long_time);

	if(flag==1)
		printf("\r\n + STATS(*)\r\n");
	else if(flag==2)
		printf("\r\n + STATS(2)\r\n");
	else
		printf("\r\n + STATS\r\n");

	printf("  - Host:\t\t%s\r\n",Host->Host );
	printf("  - Pages:\t\t%i\r\n",nPagesViewed);
	printf("  - Downloaded:\t\t%i Kb\r\n",(int)bytesDownloaded/1024);
	printf("  - Scan time: %is (%s - %i:%i:%i)\r\n\r\n",(int)((GetTickCount()-startTimeMS)/1000),startTime,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec  );

	if((file = fopen("stats.log","a"))!=NULL)
	{
		if(flag==1)
			fprintf(file," + STATS(*)\r\n");
		else if(flag==2)
			fprintf(file," + STATS(S)\r\n");
		else
			fprintf(file," + STATS\r\n");

		fprintf(file,"  - %i\\%i\\%i %i:%i:%i -- OpenWebSpider version: %s --\r\n",newtime->tm_mday ,newtime->tm_mon +1, newtime->tm_year +1900,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec,VERSION);
		fprintf(file,"  - Host:\t\t\t%s\r\n",Host->Host );
		fprintf(file,"  - Pages:\t\t%i\r\n",nPagesViewed);
		fprintf(file,"  - Downloaded:\t\t%i Kb\r\n",(int)bytesDownloaded/1024);
		fprintf(file,"  - Scan time: %is (%s - %i:%i:%i) \r\n",(int)((GetTickCount()-startTimeMS)/1000),startTime,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec);
		fprintf(file,"============================================================\r\n\r\n");
		fclose(file);
	}
}

void MemoryCorruptedHandler(char* funct)
{
	printf("\r\n\r\nMemory corrupted\r\n");
	
	if(funct)
		printf("Function: %s\r\n",funct);
		
	printf("Exiting...\r\n\r\n");
	exit(0);
}

void DoQuit()
{

	iQuit=0;
	bKillThread=0;

	DropTempTable(gTable);

	mysql_close(&gMysqlDB1);
	mysql_close(&gMysqlDB2);
	mysql_close(&gMysqlDB3);

	printf("Bye\n\n");
	//getchar();getchar();getchar();getchar();getchar();
	SetConsoleTitle("Bye byE");
	exit(0);

return;
}

int IsPageIndexed(struct sHost* host)
{
char sqlQuery[MAXQUERYSIZE];
MYSQL_RES gRes;
MYSQL_RES** tmpRes=NULL;
int ret=1;

	tmpRes=(MYSQL_RES**)malloc(sizeof(MYSQL_RES));
	
	if(tmpRes==NULL)
		MemoryCorruptedHandler("IsPageIndexed");

	sprintf(sqlQuery,"SELECT idpage FROM pagelist WHERE hostname='%s' AND page='%s' LIMIT 1",host->Host, host->Page);
	my_mysql_query_and_store_results(&gMysqlDB2, sqlQuery,tmpRes,&gRes,BLOCKINDEX);

	if(mysql_affected_rows(&gMysqlDB2)==0)	//Page is not indexed -> return 0
		ret = 0;

	if(*tmpRes)
	{
		mysql_free_result(*tmpRes);
	}

	FREE(tmpRes);

return ret;
}

/* Takes a Text and convert all special characters to UTF-8 */
int sqlTextToUTF8(char* text, char* out, int maxout)
{
int x,y;
int textLen;
unsigned char curC;
char* aass;
char ssaa[10];
int cont;
int bAscFound;

	textLen=strlen(text);

	memset(out,0,maxout);

	y=0;

	//out[y++]='\'';

	for(x=0;x<textLen;x++)
	{
		curC=text[x];
		if(curC=='&')
		{
			bAscFound=0;

			aass=strchr(text+x,';');
			if(aass && aass-(text+x) < 10)
			{
				memset(ssaa,0,10);
				strncpy(ssaa,text+x+1, (aass-(text+x))-1 );

				if(ssaa[0]=='#')
				{
					char val[10];
					if(ssaa[1]=='x')
							strcpy(val,ssaa+2);
						else
							sprintf(val,"%X",atoi(ssaa+1));

					if(y+21+strlen(val)+26<maxout)
					{
						strcat(out,"', CONVERT(CONVERT(0x");		/* 21           + */
						strcat(out,val);							/* strlen(utf8) + */
						strcat(out," using UCS2) using UTF8),'");	/* 26           = */
						y+=21+strlen(val)+26;

						bAscFound=1;
					}
					else
						return 0;

					x+=strlen(ssaa)+1;
					continue;					
				}

				cont=0;
				
				while(ahList[cont].htmlChar && bAscFound==0 )
				{
					if( strcmp( ahList[cont].htmlChar, ssaa ) == 0 )
					{
						if(ahList[cont].type==1)	/*ascii*/
						{
							if(y+strlen(ahList[cont].rep)<maxout)
							{
								strcat(out,ahList[cont].rep);
								y+=strlen(ahList[cont].rep);
								bAscFound=1;
							}
							else
								return 0;
						}
						else						/*UTF8*/
						{
							if(y+11+strlen(ahList[cont].rep)+14<maxout)
							{
								strcat(out,"', CONVERT(");			/* 11           + */
								strcat(out,ahList[cont].rep);		/* strlen(utf8) + */
								strcat(out," using UTF8),'");		/* 14           = */
								y+=11+strlen(ahList[cont].rep)+14;
								bAscFound=1;
							}
							else
								return 0;
						}
						x+=strlen(ssaa)+1;

					}	/*if( strcmp( ahList[cont].htmlChar, ssaa ) == 0 )*/

					cont++;

				}	/*while*/
				
			}	/*if(aass && aass-(text+x) < 10)*/

			if(bAscFound==0)
				out[y++]='&';
		
		}
		else
			out[y++]=curC;
	}

return 1;
}

#endif

/*EOF*/
