
/* OpenWebSpider
*
*  Author:     Stefano Alimonti aka Shen139
*  Mail:       shen139 [at] openwebspider (dot) org
*
*
* This file is part of OpenWebSpider
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*
*/

#ifndef __MISC
#define __MISC

#ifndef WIN32

int GetTickCount()
{
	struct timeval tv;
	gettimeofday(&tv, NULL);
	return tv.tv_sec*1000L+tv.tv_usec/1000L;
}

void SetConsoleTitle(char* msg)
{
	return;
}

int stricmp(char*a,char*b)
{
	return strcasecmp(a,b);
}

int strnicmp(char*a,char*b,int c)
{
	return strncasecmp(a,b,c);
}

void Sleep(int n)
{
	usleep((unsigned)n*1000);	
	return;
}

char* _strupr(char*a)
{
	int m,i;
	m=strlen(a);
	
	for(i=0;i<m;i++)
		a[i]=(char)toupper(a[i]);
	
	return a;
}

char* _strlwr(char*a)
{
	int m,i;
	m=strlen(a);
	
	for(i=0;i<m;i++)
		a[i]=(char)tolower(a[i]);
	
	return a;
}

int closesocket(int s)
{
	return close(s);
}

int ExitThread(int a)
{
	pthread_exit(&a);
}

int TerminateThread(pthread_t thread,int nothing)
{
	return pthread_cancel(thread);
}

void CloseHandle(HANDLE a)
{
	return;
}

#endif

int InitIndexing(struct sHost currentHst)
{
	int condition = 1;
	char* sqlQuery;
	MYSQL_RES gRes;
	MYSQL_RES* tmpRes;
	DWORD avgSec;
	time_t long_time;
	struct tm *newtime;
	struct sHost *robots_txt;
	
#ifdef WIN32
	char strTitle[3000];
#endif
	
	robots_txt=(struct sHost*)malloc(sizeof(struct sHost));
	
	sqlQuery = malloc(MAXQUERYSIZE);
	
	if(robots_txt==NULL || sqlQuery==NULL)
		MemoryCorruptedHandler("InitIndexing");
	
	currentHst.viewed = 0;
	memcpy(robots_txt,&currentHst,sizeof(struct sHost));
	strcpy(robots_txt->Page, "/robots.txt");
	robots_txt->level = 1;
	robots_txt->type  = 1;
	lstFirst = lstInit(*robots_txt);
	
	FREE(robots_txt);
	
	nPagesViewed=0;
	bytesDownloaded=0;
	startTimeMS=0;
	
	
	currentHst.level = 1;
	lstAddHost(&lstFirst,currentHst);
	
	memcpy(&IndexingHost,&currentHst,sizeof(struct sHost));
	
	printf("\r\n");
	printf("Start Host        : %s\r\n",currentHst.Host);
	printf("Start Page        : %s\r\n", currentHst.Page);
	printf("Scan Mode         : Index\r\n");
	printf("Mode              : %s\r\n",(starthostonly==1)?"Single Host":"Recursive");
	printf("Max depth level   : %i\r\n",maxDepthLevel);
	printf("Mysql server n.1  : %s\r\n",MYSQLSERVER1);
	printf("Mysql server n.2  : %s\r\n",MYSQLSERVER2);
	printf("Mysql server n.3  : %s\r\n",MYSQLSERVER3);
	printf("Surfing the net... (press CTRL+C to exit)\r\n");
	
	
	if(actAsAServerPort)
	{
		CreateServerThread(actAsAServerPort);
		Sleep(200);
	}
	
	{
		int (*modInitFilter)(char*, char*);
		char sError[MAXDESCRIPTIONSIZE];
		int ret;
		
		if( ( modInitFilter = GetInitModFunctionHandlerByName("modFilter")) )
		{	
			ret=modInitFilter(currentHst.Host,sError);
			if(ret==0)
			{
				printf("\nmodInitFilter(): %s\n\n",sError);
				ERROR_LOG(sError);
				return 0;
			}
		}
	}
	
	if(!StartUpWinsock())
	{
		fprintf(stderr,"WSAStartup() error\r\n");
		ERROR_LOG("WSAStartup() error")
			return -1;
	}
	
	SetConsoleTitle("Connecting to mysql...");
	
	printf("Connecting to Mysql server n.1 (%s)...",MYSQLSERVER1);                 //Hosts
	if(sqlConnect(MYSQLSERVER1, USERDB1, PASSDB1, DB1,&gMysqlDB1, MYSQLSERVER_PORT1)==0)
	{
		fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB1,mysql_error(&gMysqlDB1));
		
		ERROR_LOG(mysql_error(&gMysqlDB1))
			return -1;
	}
	
	printf("OK\r\nConnecting to Mysql server n.2 (%s)...",MYSQLSERVER2);           //Pages
	if(sqlConnect(MYSQLSERVER2, USERDB2, PASSDB2, DB2,&gMysqlDB2, MYSQLSERVER_PORT2)==0)
	{
		fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB2,mysql_error(&gMysqlDB2));
		
		ERROR_LOG(mysql_error(&gMysqlDB2))
			
			mysql_close(&gMysqlDB2);
		return -1;
	}
	
	printf("OK\r\nConnecting to Mysql server n.3 (%s)...",MYSQLSERVER3);           //Tmp Tables
	if(sqlConnect(MYSQLSERVER3, USERDB3, PASSDB3, DB3,&gMysqlDB3, MYSQLSERVER_PORT3)==0)
	{
		fprintf(stderr, "ERROR\r\nFailed to connect to database(%s): Error: %s\r\n",DB3,mysql_error(&gMysqlDB3));
		
		ERROR_LOG(mysql_error(&gMysqlDB3))
			
			mysql_close(&gMysqlDB3);
		return -1;
	}
	printf("OK\r\n");
	
	SetConsoleTitle("Creating temp table...");
	
	do
	{
		RandomTable(gTable);
	}
	while(!CreateTmpTable(gTable));    //Loop until creates a new tmp table!!!
	
	
	SetConsoleTitle("...");
	
	sprintf(sqlQuery,"SELECT * FROM hostlist WHERE hostname = \'http://%s\' limit 1",currentHst.Host);
	if(!my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK))
	{
		tmpRes=mysql_store_result(&gMysqlDB1);
		if (tmpRes)
		{
			memcpy(&gRes,tmpRes,sizeof(MYSQL_RES));
		}
	}
	
	if(mysql_affected_rows(&gMysqlDB1)>0)
        sprintf(sqlQuery,"UPDATE hostlist SET port=%i, status = 2, lastvisit=curdate() WHERE hostname =\'http://%s\' limit 1", currentHst.port, currentHst.Host);
	else
		//puts current hostname in the db as "Scanning host in progress.." (viewed==2)
		sprintf(sqlQuery,"INSERT INTO hostlist (hostname, port, status, lastvisit) VALUES('http://%s', %i, 2, curdate());", currentHst.Host, currentHst.port);
	
	my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);
	
	if(bUpdate==0)
	{
		printf("Deleting old index for %s...",currentHst.Host);
		fflush(stdout);
		
		sprintf(sqlQuery,"DELETE ii FROM pagelist, ii WHERE pagelist.hostname =\'%s\' AND ii.pageid = pagelist.idpage ",currentHst.Host);
		my_mysql_query(&gMysqlDB2, sqlQuery,NO_BLOCK);

        sprintf(sqlQuery,"DELETE FROM pagelist WHERE hostname =\'%s\' ",currentHst.Host);
		my_mysql_query(&gMysqlDB2, sqlQuery,NO_BLOCK);
		
		printf("OK\r\n");
		
		printf("Deleting old rels for %s...",currentHst.Host);
		
		sprintf(sqlQuery,"DELETE FROM rels WHERE host =\'http://%s\'",currentHst.Host);
		my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);
		
		printf("OK\r\n");
		
	}
	
	signal(SIGINT,  sigdie);
	signal(SIGTERM, sigdie);
	
	fflush(stdin);
	
	SetConsoleTitle("Creating threads...");
	
	CreateThreads();
	/**************************MT********************************/
	
	printf("\r\n");
	startTimeMS=GetTickCount();
	avgSec=0;
	
	time( &long_time ); 
	newtime=localtime(&long_time);
	
	sprintf(startTime,"%i:%i:%i",newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec );
	
	while(condition)
	{
#ifdef WIN32
		sprintf(strTitle,"OpenWebSpiderV%s | Pages: %i | Time: %i sec | host: %s",VERSION,nPagesViewed,(int)((GetTickCount()-startTimeMS)/1000),currentHst.Host);
		SetConsoleTitle(strTitle);
#endif
		CheckThreads();
		
		Sleep(300);
		
		if(iQuit==1)
		{
			printf("\r\n\r\nQuitting: Killing threads...\n\n");
			
			KillThreads();
			
			iQuit=0;
			bKillThread=0;
			
			sprintf(sqlQuery,"UPDATE hostlist SET status = 1,pages=%i WHERE hostname =\'http://%s\' limit 1",nPagesViewed,currentHst.Host);
			
			printStats(&currentHst,0);
			
			my_mysql_ping(&gMysqlDB1,NO_BLOCK);
			my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);
			
			FlushTempTable(gTable);
			
			if( bBuildOwsOwnIndex == 1 )
            {
			    /* all pages are swapped to the table pagelist */
			    /* are we using ows own index? */
			    /* if so: build the index for the current hostname */
			    BuildOwsOwnIndex(currentHst);
            }
	
			CalcPageRank(currentHst.Host);
			
			DoQuit();
			
		}/*if(iQuit==1)*/
		
		if(bKillThread==1)
		{
			SetConsoleTitle("Killing threads");
			KillThreads();
			CreateThreads();
		}/*if(bKillThread==1)*/
		
		
		thrdBlock(BLOCKTHRDHST);
		if(/*iDoNextHost==1 ||*/						/*Switching to the next host*/
			(lstGetNodeByVal(lstFirst,0)==NULL &&
			lstGetNodeByVal(lstFirst,2)==NULL))
		{
			/* set the status of the pages to be indexed and of that in indexing as indexed */
			/*lstSetNodeStatus(lstFirst,0,1);
			lstSetNodeStatus(lstFirst,2,1);*/
			
			thrdUnBlock(BLOCKTHRDHST);
			
			SetConsoleTitle("Killing threads");
			
			bKillThread=1;
			
			KillThreads();
			
			if(iDoNextHost==1)
			{
				sprintf(sqlQuery,"UPDATE hostlist SET status = 1,pages=%i WHERE hostname =\'http://%s\' limit 1",nPagesViewed,currentHst.Host);
				
				my_mysql_ping(&gMysqlDB1,NO_BLOCK);
				my_mysql_query(&gMysqlDB1, sqlQuery,NO_BLOCK);
				
				iDoNextHost=0;
				
			}
			
			FlushTempTable(gTable);
			
            if( bBuildOwsOwnIndex == 1 )
            {
			    /* all pages are swapped to the table pagelist */
			    /* are we using ows own index? */
			    /* if so: build the index for the current hostname */
			    BuildOwsOwnIndex(currentHst);
            }
			
			CalcPageRank(currentHst.Host);
			
			iRobCrawlDelay=0;
			
			if((ReturnFirstUrl(&currentHst))==-1)
			{
				fprintf(stderr,"\nBuffer empty\n");
				iQuit=1;
			}
			
			if(iQuit==1)
			{
				DoQuit();
			}
			
			CreateThreads();
			
			bKillThread=0;
			
		}//if(iDoNextHost==1 || (lstGetNodeByVal(lstFirst,0)==NULL && lstGetNodeByVal(lstFirst,2)==NULL))
		
		thrdUnBlock(BLOCKTHRDHST);
		
	}/*while(condition)*/
	
	return 1;
}

/*
* flag=0 -> complete stats
* flag=1 -> in-complete stats
* flag=2 -> switched to the next host
*/
void printStats(struct sHost* Host,int flag)
{
	time_t long_time;
	struct tm *newtime;
	FILE* file;
	
	time( &long_time ); 
	newtime=localtime(&long_time);
	
	if(flag==1)
		printf("\r\n + STATS(*)\r\n");
	else if(flag==2)
		printf("\r\n + STATS(2)\r\n");
	else
		printf("\r\n + STATS\r\n");
	
	printf("  - Host:\t\t%s\r\n",Host->Host );
	printf("  - Pages:\t\t%i\r\n",nPagesViewed);
	printf("  - Downloaded:\t\t%i Kb\r\n",(int)bytesDownloaded/1024);
	printf("  - Scan time: %is (%s - %i:%i:%i)\r\n\r\n",(int)((GetTickCount()-startTimeMS)/1000),startTime,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec  );
	
	if((file = fopen("stats.log","a"))!=NULL)
	{
		if(flag==1)
			fprintf(file," + STATS(*)\r\n");
		else if(flag==2)
			fprintf(file," + STATS(S)\r\n");
		else
			fprintf(file," + STATS\r\n");
		
		fprintf(file,"  - %i\\%i\\%i %i:%i:%i -- OpenWebSpider version: %s --\r\n",newtime->tm_mday ,newtime->tm_mon +1, newtime->tm_year +1900,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec,VERSION);
		fprintf(file,"  - Host:\t\t\t%s\r\n",Host->Host );
		fprintf(file,"  - Pages:\t\t%i\r\n",nPagesViewed);
		fprintf(file,"  - Downloaded:\t\t%i Kb\r\n",(int)bytesDownloaded/1024);
		fprintf(file,"  - Scan time: %is (%s - %i:%i:%i) \r\n",(int)((GetTickCount()-startTimeMS)/1000),startTime,newtime->tm_hour ,newtime->tm_min ,newtime->tm_sec);
		fprintf(file,"============================================================\r\n\r\n");
		fclose(file);
	}
}

void MemoryCorruptedHandler(char* funct)
{
	printf("\r\n\r\nMemory corrupted\r\n");
	
	if(funct)
		printf("Function: %s\r\n",funct);
	
	printf("Exiting...\r\n\r\n");
	exit(0);
}

void DoQuit()
{
	if(actAsAServerPort)
	{
		printf("\n\nFreeing Sockets...");
		
		closesocket(OWS_Server_fd);
		
		printf("OK\n\n");
	}
	
	iQuit=0;
	bKillThread=0;
	
	DropTempTable(gTable);
	
	mysql_close(&gMysqlDB1);
	mysql_close(&gMysqlDB2);
	mysql_close(&gMysqlDB3);
	
	Sleep(200);
	
	printf("Bye\n\n");
	//getchar();getchar();getchar();getchar();getchar();
	SetConsoleTitle("Bye byE");
	exit(0);
	
	return;
}


#endif

/*EOF*/

