/*////////////////////////////////////////////////////////////////////////
Copyright (c) 1994 Electrotechnical Laboratry (ETL), AIST, MITI

Permission to use, copy, modify, and distribute this material for any
purpose and without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies, and
that the name of ETL not be used in advertising or publicity pertaining
to this material without the specific, prior written permission of an
authorized representative of ETL.
ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS
MATERIAL FOR ANY PURPOSE.  IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS
OR IMPLIED WARRANTIES.
/////////////////////////////////////////////////////////////////////////
Content-Type:	program/C; charset=US-ASCII
Program:	dget.c
Author:		Yutaka Sato <ysato@etl.go.jp>
Description:
History:
	940206	extracted from urlfind.c
//////////////////////////////////////////////////////////////////////#*/
#include <ctype.h>
#include <stdio.h>
#include "ystring.h"
#include "url.h"
#include "delegate.h"
#include "fpoll.h"
#include "file.h"
#include "auth.h"

const char *strid_find(int tab,int hx,int id);
int connect2server(Connection *Conn,PCStr(proto),PCStr(host),int port);

void tar_scan(FILE *ifp,FILE *ofp,FILE *msg,PCStr(opts),PCStr(tarfile),const char *files[],PCStr(edits));

FILE *ftp_fopen0(int put,int svsock,PCStr(host),PCStr(user),PCStr(pass),PCStr(path),PVStr(resp),int rsize,int *isdirp);

static const char *usage = "\
Usage: urlfind URL\n\
    -- Find recursively in URL space.\n\
";
static const char *arg1spec = "\
Argument specification error:\n\
   The first argument should be URL as follows:\n\
      protocol://host\n\
      protocol://host:port\n\
      protocol://host/path\n\
      protocol://host:port/path\n\
";

typedef struct {
	int	de_NoCache;
	int	de_PutHead;
	int	de_PutBody;
	int	de_KeepAlive;
	MStr(	de_AccEncode,64);
	int	de_SendCookie;
	int	de_Interval;
	int	de_SendOnly;
	int	de_StatCodeOnly;
	int	de_Http11;
	int	de_Recursive;
	int	de_URLgot;
	int	de_URLgetx;
	int	de_URLgotx;
  const char   *de_TarOpt;
  const	char   *de_TarFiles[2]; /**/
	MStr(	de_Host,256);
	int	de_noHost;
	int	de_toProxy;
} DgetEnv;
static DgetEnv *dgetEnv;
#define NoCache		dgetEnv->de_NoCache
#define PutHead		dgetEnv->de_PutHead
#define PutBody		dgetEnv->de_PutBody
#define KeepAlive	dgetEnv->de_KeepAlive
#define AccEncode	dgetEnv->de_AccEncode
#define SendCookie	dgetEnv->de_SendCookie
#define Interval	dgetEnv->de_Interval
#define SendOnly	dgetEnv->de_SendOnly
#define StatCodeOnly	dgetEnv->de_StatCodeOnly
#define Http11		dgetEnv->de_Http11
#define Recursive	dgetEnv->de_Recursive
#define URLgot		dgetEnv->de_URLgot
#define URLgetx		dgetEnv->de_URLgetx
#define URLgotx		dgetEnv->de_URLgotx
#define TarOpt		dgetEnv->de_TarOpt
#define TarFiles	dgetEnv->de_TarFiles
#define Host		dgetEnv->de_Host
/**/
#define noHost		dgetEnv->de_noHost
#define ToProxy		dgetEnv->de_toProxy
void minit_dget()
{
	if( dgetEnv == 0 )
		dgetEnv = NewStruct(DgetEnv);
}

static int http1(int nr,FILE *ts,FILE *fs,PCStr(proto),PCStr(host),int port,PCStr(path),PVStr(result));

static void connect1(FILE *ts,FILE *fs,PCStr(site))
{	CStr(H,128);
	CStr(resp,128);
	int P;

	P = scan_hostport("https",site,AVStr(H));
	fprintf(ts,"CONNECT %s:%d HTTP/1.0\r\n\r\n",H,P);
	fflush(ts);
	while( fgets(resp,sizeof(resp),fs) != NULL ){
		fprintf(stderr,"%s",resp);
		if( *resp == '\r' || *resp == '\n' )
			break;
	}
}

int dget_main(int ac,const char *av[],Connection *Conn)
{	const char *proxy;
	CStr(phost,128);
	int pport;
	int ai,un,ux,nrepeat,nr;
	const char *arg;
	const char *urls[256]; /**/
	CStr(url,URLSZ);
	const char *dp;
	const char *u1;
	int svsock,num,total;
	CStr(date,128);
	double time0,Start,Time0,TTime,Max,Min;
	CStr(type,128);
	FILE *ts,*fs;
	int leng;
	float intvl;
	CStr(proto,256);
	CStr(site,1024);
	CStr(upath,1024);
/*
	CStr(noproxy,256);
*/
	CStr(noproxy,URLSZ);
	int withPROXY;

	minit_dget();

	if( ac < 2 ){
		fprintf(stderr,"Usage: %s [PROXY=host:port] [url] [-o]\r\n",
			av[0]);
		exit(0);
	}

	nrepeat = 1;
	proxy = NULL;
	un = 0;
	strcpy(proto,"http");

	for( ai = 1; ai < ac; ai++ ){
		arg = av[ai];

		if( arg[0] == '-' && isdigit(arg[1]) ){
			nrepeat = atoi(&arg[1]);
		}else
		if( strncmp(arg,"-i",2) == 0 ){
			intvl = 0;
			sscanf(&arg[2],"%f",&intvl);
			Interval = (int)(intvl * 1000);
		}else
		if( strcmp(arg,"-c") == 0 ){
			NoCache = 1;
		}else
		if( strcmp(arg,"-h") == 0 ){
			PutHead = 1;
		}else
		if( strcmp(arg,"-o") == 0 ){
			PutBody = 1;
		}else
		if( strcmp(arg,"-r") == 0 ){
			Recursive = 1;
			URLgot = strid_create(0x10000);
		}else
		if( strncmp(arg,"-tar",4) == 0 ){
			TarOpt = arg + 4;
			TarFiles[0] = "*";
			TarFiles[1] = 0;
		}else
		if( strncmp(arg,"-H",2) == 0 ){
			if( arg[2] == 0 )
			noHost = 1;
			else	strcpy(Host,arg+2);
		}else
		if( strncmp(arg,"-C",2) == 0 ){
			SendCookie = atoi(arg+2);
		}else
		if( strcmp(arg,"-k") == 0 ){
			KeepAlive = 1;
		}else
		if( strncmp(arg,"-e",2) == 0 ){
			wordScan(arg+2,AccEncode);
		}else
		if( strcmp(arg,"-so") == 0 ){
			SendOnly = 1;
		}else
		if( strcmp(arg,"-s") == 0 ){
			StatCodeOnly = 1;
		}else
		if( strcmp(arg,"-v1.1") == 0 ){
			Http11 = 1;
			KeepAlive = 1;
		}else
		if( arg[0] == '-' ){
			fprintf(stderr,"Unknown option: %s\n",arg);
		}else
		if( strncmp(arg,"FSV=",4) == 0 ){
		}else
		if( strncmp(arg,"MYAUTH=",7) == 0 ){
		}else
		if( strncmp(arg,"SOCKS=",6) == 0 ){
		}else
		if( strncmp(arg,"PROXY=",6) == 0 ){
			ToProxy = 1;
			proxy = arg + 6;
		}else{
			if( elnumof(urls) <= un ){
				fprintf(stderr,"*** too many URLs (%d)\n",un);
			}else{
				urls[un++] = arg;
			}
		}
	}

	if( proxy == NULL )
		proxy = getenv("PROXY");
	if( proxy != NULL )
		withPROXY = 1;
	else	withPROXY = 0;

/*
	if( proxy == NULL ){
		fprintf(stderr,
	"PROXY=host:port should be given by environment or parameger.\n");
		exit(1);
	}
*/
	if( proxy != NULL )
	if( Xsscanf(proxy,"%[^:]:%d",AVStr(phost),&pport) != 2 ){
		fprintf(stderr,"Illegal specification PROXY=%s\n",proxy);
		exit(2);
	}
	time0 = Time();
	num = 0;
	total = 0;
	ts = fs = NULL;

	ux = 0;
	svsock = -1;

	TTime = Min = Max = 0;
	for(;;){
		if( Recursive && URLgotx < URLgetx ){
			u1 = strid_find(URLgot,0,URLgotx);
			URLgotx++;
			fprintf(stderr,"-- %4d %s\n",URLgotx,u1);
			strcpy(url,u1);
		}else
		if( 0 < un ){
			if( un <= ux )
				break;
			strcpy(url,urls[ux++]);
		}else{
			if( fgets(url,sizeof(url),stdin) == NULL )
				break;
			if( dp = strpbrk(url,"\r\n") )
				truncVStr(dp);
			if( url[0] == '#' )
				continue;
		}
		if( !withPROXY ){
			decomp_absurl(url,AVStr(proto),AVStr(site),AVStr(upath),sizeof(upath));
			pport = scan_hostport(proto,site,AVStr(phost));
			if( pport == 0 ){
				fprintf(stderr,"? %s\n",url);
				exit(-1);
			}
			if( Host[0] == 0 )
			strcpy(Host,site);
			sprintf(noproxy,"%s:%d",phost,pport);
			proxy = noproxy;
			sprintf(url,"/%s",upath);
			if( !StatCodeOnly )
			fprintf(stderr,"%s [%s] %s\n",proto,noproxy,url);
		}

		for( nr = 0; nr < nrepeat; nr++ ){

if( 1 < nrepeat ){
	fprintf(stderr,"# %d / %d\n",nr,nrepeat);
}
			type[0] = 0;
			Start = Time();
			num++;
			/*
			if( svsock < 0 )
				svsock = client_open("URLFIND",
					proto,phost,pport);
			*/
			if( svsock < 0 ){
			svsock = connect2server(Conn,proto,phost,pport);
			}

			if( svsock < 0 ){
				fprintf(stderr,"cannot connect to PROXY=%s\n",
					proxy);
				if( Interval == 0 )
					break;
				leng = 0;
			}else{
				if( ts == NULL ){
					ts = fdopen(svsock,"w");
					fs = fdopen(svsock,"r");
				}
				if( strncmp(url,"https://",8) == 0 ){
					connect1(ts,fs,url+8);
				}

if( (proxy == NULL || proxy == noproxy) && strcmp(proto,"ftp") == 0 ){
	FILE *dfp;
	CStr(resp,1024);
	int isdir;
	CStr(auth,128);
	const char *user;
	const char *pass;

	if( get_MYAUTH(Conn,AVStr(auth),"ftp",phost,pport) ){
		user = auth;
		if( pass = strchr(auth,':') ){
			truncVStr(pass); pass++;
		}else	pass = "";
	}else{
		user = "anonymous";
		pass = getADMIN();
	}

	dfp = ftp_fopen0(0,svsock,phost,
		user,pass,url,AVStr(resp),sizeof(resp),&isdir);
	strcpy(type,"application/octetstream");
	if( dfp != NULL ){
		if( TarOpt ){
			tar_scan(dfp,stdout,stderr,TarOpt,"-",TarFiles,NULL);
		}else
		if( PutBody )
			total += leng = copyfile1(dfp,stdout);
		else	total += leng = copyfile1(dfp,fdopen(openNull(1),"w"));
	}else{
		total += leng = 0;
	}
	KeepAlive = 0;
}else
if( Recursive && !withPROXY ){
total += leng = http1(nr,ts,fs,"http",phost,pport,url,AVStr(type));
}else{
total += leng = http1(nr,ts,fs,"http","-",0,url,AVStr(type));
 }

				if( !KeepAlive ){
					fclose(ts); ts = NULL;
					fclose(fs); fs = NULL;
					svsock = -1;
				}
			}
			Time0 = Time() - Start;
			if( Min == 0 || Time0 < Min )
				Min = Time0;
			if( Max == 0 || Max < Time0 )
				Max = Time0;
			TTime += Time0;

			if( 0 < Interval ){
				StrftimeLocal(AVStr(date),sizeof(date),"%m/%d %H:%M:%S",time(0),0);
				fprintf(stderr,"%s %.3f (%d)\n",date,Time0,
					leng);
				msleep(Interval);
			}
		}
		StrftimeLocal(AVStr(date),sizeof(date),"%H:%M:%S",time(0),0);
		fprintf(stderr,"%s %3d: %s %s\n",date,num,url,type);
		fflush(stderr);
	}
fprintf(stderr,
"%d GET / %f seconds =  %6.2f GET/second %d bytes (%.3f/%.3f/%.3f)\n",
num,Time()-time0, num/(Time()-time0),total,
Min,TTime/nrepeat,Max);
	exit(0);
	return 0;
}

static void url1(PCStr(url),PCStr(base))
{	CStr(xurl,4096);
	const char *bp;
	const char *dp;
	int id;
/*
fprintf(stderr,"---- %s %s\n",base,url);
*/
	if( strncmp(url,"http:",5) == 0 )
	if( url[5] != '/' )
		url += 5;
	if( strncmp(url,"./",2) == 0 )
		url += 2;

	if( dp = strstr(url,"..") )
	if( dp[2] == 0 || dp[2] == '/' )
	if( dp == url || dp[-1] == '/' )
		return;

	xurl[0] = 0;
	if( strchr(url,':') == 0 ){
		strcpy(xurl,base);
		if( bp = strrchr(xurl,'/') )
			((char*)bp)[1] = 0;
	}

	strcat(xurl,url);
	if( dp = strchr(xurl,'#') )
		truncVStr(dp);

	if( strncmp(xurl,base,strlen(base)) != 0 )
		return;
	if( strcmp(xurl,base) == 0 )
		return;

	id = strid(URLgot,xurl,URLgetx);
	if( id == URLgetx ){
		URLgetx++;
		fprintf(stderr,"++ %4d %s\n",URLgetx,xurl);
	}
}

int recvHTTPbodyX(Connection *Conn,int chunked,FILE *in,FILE *out);
static int http1(int nr,FILE *ts,FILE *fs,PCStr(proto),PCStr(host),int port,PCStr(path),PVStr(result))
{	CStr(request,URLSZ);
	CStr(resp,1024);
	CStr(xline,URLSZ);
	const char *url;
	double start;
	int code;
	CStr(type,256);
	int leng;
	int total = 0;
	int rcc;
	int keepalive;
	CStr(hostport,256);
	CStr(base,URLSZ);
	CStr(auth,256);
	CStr(authB64,256);
	const char *dp;
	Connection ConnBuf, *Conn = &ConnBuf;
	int chunked = 0;

	HostPort(AVStr(hostport),proto,host,port);
	sprintf(base,"%s://%s/%s",proto,hostport,path[0]=='/'?path+1:path);

/*
	sprintf(request,"GET %s HTTP/1.0\r\n\r\n",path[0]?path:"/");
*/
	if( Http11 )
		sprintf(request,"GET %s HTTP/1.1\r\n",path);
	else	sprintf(request,"GET %s HTTP/1.0\r\n",path);

	if( !noHost && Host[0] )
	Xsprintf(TVStr(request),"Host: %s\r\n",Host);
	Xsprintf(TVStr(request),"User-Agent: DeleGate/%s (dget)\r\n",
		DELEGATE_ver());

	/*
	bzero(Conn,sizeof(Connection));
	*/
	ConnInit(Conn);
	if( get_MYAUTH(Conn,AVStr(auth),"http",host,port) ){
		str_to64(auth,strlen(auth),AVStr(authB64),sizeof(authB64),1);
		if( dp = strpbrk(authB64,"\r\n") ) truncVStr(dp);
		Xsprintf(TVStr(request),"Authorization: Basic %s\r\n",authB64);
	}
	if( get_MYAUTH(Conn,AVStr(auth),"http-proxy",host,port) ){
		str_to64(auth,strlen(auth),AVStr(authB64),sizeof(authB64),1);
		if( dp = strpbrk(authB64,"\r\n") ) truncVStr(dp);
		Xsprintf(TVStr(request),"Proxy-Authorization: Basic %s\r\n",authB64);
	}

	if( NoCache )
		strcat(request,"Pragma: no-cache\r\n");
	if( KeepAlive ){
		if( ToProxy )
		strcat(request,"Proxy-Connection: Keep-Alive\r\n");
		else
		strcat(request,"Connection: keep-alive\r\n");
	}
	if( AccEncode[0] ){
		Xsprintf(TVStr(request),
			"Accept-Encoding: %s\r\n",AccEncode);
	}
	if( SendCookie ){
		int ci;
		refQStr(dp,request);/**/
		strcat(request,"Cookie:");
		dp = request + strlen(request);
		for( ci = 0; ci < SendCookie; ci++ ){
			if( &request[sizeof(request)-5] <= dp )
				break;
			setVStrPtrInc(dp," 123456789"[ci%10]);
		}
		strcpy(dp,"\r\n");
	}

 if( getenv("HEADSIZE") ){
	refQStr(p,request); /**/
	int size,i;
	size = atoi(getenv("HEADSIZE"));
	strcat(request,"X-Padding: ");
	p = request + strlen(request);
	for( i = 0; i < size; i++ ){
		if( &request[sizeof(request)-5] <= &p[i] )
			break;
		setVStrElem(p,i,'X');
	}
	setVStrEnd(p,i);
	strcat(request,"\r\n");
 }
	strcat(request,"\r\n");
	fputs(request,ts);
	fflush(ts);
/*
fprintf(stderr,"REQUEST-LEN: %d\n",strlen(request));
*/
	if( SendOnly )
		return 0;

	start = Time();
	if( fgets(resp,sizeof(resp),fs) == NULL ){
		fprintf(stderr,"[NULL] empty response (%3.2fs)\n",Time()-start);
		sprintf(result,"[NULL]");
		goto xERR;
	}
	if( PutHead )
		fputs(resp,stdout);

	if( strncmp(resp,"HTTP/1.",7) != 0 ){
		fprintf(stderr,"[%d][NON-HTTP/1.0] %s\n",nr,resp);
		sprintf(result,"[%d][NON-HTTP/1.0]",nr);
		goto xERR;
	}
	sscanf(resp,"%*s %d",&code);
	if( StatCodeOnly ){
		printf("%d\n",code);
		exit(code);
	}

	type[0] = 0;
	leng = 0;
	keepalive = 0;

	while( fgets(resp,sizeof(resp),fs) != NULL ){
		const char *dp;

		if( PutHead )
			fputs(resp,stdout);
		if( dp = strpbrk(resp,"\r\n") )
			truncVStr(dp);
		if( dp == resp )
			break;

		if( strncasecmp("Transfer-Encoding:",resp,18) == 0 ){
			if( strstr(resp,"chunked") )
				chunked = 1;
		}else
		if( strncasecmp("Content-Type:",resp,12) == 0 )
			Xsscanf(resp,"%*s %[^; \t\r\n]",AVStr(type));
		else
		if( strncasecmp("Content-Length:",resp,13) == 0 )
			sscanf(resp,"%*s %d",&leng);
		else
		if( strncasecmp("Proxy-Connection: keep-alive",resp,28) == 0
		 || strncasecmp(      "Connection: keep-alive",resp,22) == 0 )
			keepalive = 1;
	}
	sprintf(result,"[%d][%s][%d]",code,type,leng);

	if( TarOpt ){
		tar_scan(fs,stdout,stderr,TarOpt,"-",TarFiles,NULL);
	}else
/*
	if( KeepAlive && keepalive && 0 < leng ){
*/
	if( keepalive ){
		int ci,ch;
		if( chunked && PutBody ){
			ci = recvHTTPbodyX(Conn,chunked,fs,stdout);
		}else
		for( ci = 0; ; ci++ ){
			if( ci == leng )
				fflush(stdout);
			if( leng <= ci && fPollIn(fs,1) <= 0 )
				break;
			if( (ch = getc(fs)) == EOF )
				break;
			if( PutBody )
				putc(ch,stdout);
			if( READYCC(fs) <= 0 )
				fflush(stdout);
		}
		fprintf(stderr,">>>> Response: %d/%d\n",ci,leng);
		/*
		const char *buf;
		buf = (char*)malloc(leng);
		total = fread (buf,1,leng,fs);
		if( PutBody )
			fwrite(buf,1,leng,stdout);
		free(buf);
		*/
	}else{
		if( Recursive && strcasecmp(type,"text/html") == 0 ){
			CStr(line,0x10000);
			const char *lp;
			CStr(xline,0x20000);
			const char *np;
			int remlen,leng,isbin;

			remlen = sizeof(line); 
			for(;;){
				lp = fgetsByBlock(AVStr(line),sizeof(line),fs,1,0,0,0,
					remlen,&leng,&isbin);
				if( lp == NULL )
					break;
/*
fprintf(stderr,"read %d bytes\n",strlen(line));
*/
				url_absolute("-.-",proto,host,port,"",line,AVStr(xline),VStrNULL);
				scan_url(xline,(iFUNCP)url1,base,(void*)"");
			}
		}else
		while( (rcc=fread(resp,1,sizeof(resp),fs)) != 0 ){
			total += rcc;
			if( PutBody )
			{
				fwrite(resp,1,rcc,stdout);
				if( Http11 ){
					while( ready_cc(fs) ){
						putc(getc(fs),stdout);
					}
					fflush(stdout);
				}
			}
		}
	}

	if( 100000 < total )
		fprintf(stderr,"total: %d\n",total);

xERR:
	if( !keepalive )
		KeepAlive = 0;

	return total;
}
