diff -r -u webalizer-2.01-10/webalizer.c webalizer-2.01-10-tpu/webalizer.c --- webalizer-2.01-10/webalizer.c 2002-04-17 07:11:31.000000000 +0900 +++ webalizer-2.01-10-tpu/webalizer.c 2003-10-16 13:15:57.000000000 +0900 @@ -39,6 +39,7 @@ #include #include #include +#include /* ensure getopt */ #ifdef HAVE_GETOPT_H @@ -224,6 +225,8 @@ char *f_cp=f_buf+GZ_BUFSIZE; /* pointer into the buffer */ int f_end; /* count to end of buffer */ +iconv_t cd_from_sjis, cd_from_utf8; + /*********************************************/ /* MAIN - start here */ /*********************************************/ @@ -526,6 +529,9 @@ start_time = times(&mytms); + cd_from_sjis = iconv_open("EUC-JP", "Shift_JIS"); + cd_from_utf8 = iconv_open("EUC-JP", "UTF-8"); + /*********************************************/ /* MAIN PROCESS LOOP - read through log file */ /*********************************************/ @@ -1345,6 +1351,9 @@ if (dns_db) close_cache(); #endif + iconv_close(cd_from_sjis); + iconv_close(cd_from_utf8); + /* Whew, all done! Exit with completion status (0) */ exit(0); } @@ -1804,6 +1813,34 @@ char srch[80]=""; unsigned char *cp1, *cp2, *cps; int sp_flg=0; + iconv_t cd=(iconv_t)-1; + char tmpbuf2[BUFSIZE]; + size_t inlen,outlen; + unsigned char *cp3; + int hex_flg=0; + + if((cp1=strstr(ptr,"?ie="))==NULL){ + cp1=strstr(ptr,"&ie="); + } + if(cp1 != NULL){ + if(strncasecmp(cp1+1,"ie=Shift_JIS",12) == 0 || + strncasecmp(cp1+1,"ie=shift-jis",12) == 0){ + cd = cd_from_sjis; + } + if(strncasecmp(cp1+1,"ie=utf-8",8) == 0 || + strncasecmp(cp1+1,"ie=utf8",7) == 0){ + cd = cd_from_utf8; + } + } + if((cp1=strstr(ptr,"?lr=lang_ja"))==NULL){ + if((cp1=strstr(ptr,"&lr=lang_ja"))==NULL){ + if((cp1=strstr(ptr,"?hl=ja"))==NULL){ + cp1=strstr(ptr,"&hl=ja"); + } + } + } + if(cp1 != NULL) hex_flg=1; + /* Check if search engine referrer or return */ if ( (cps=isinglist(search_list,log_rec.refer))==NULL) return; @@ -1826,8 +1863,24 @@ if (*cp1=='+') *cp1=' '; /* change + to space */ if (sp_flg && *cp1==' ') { cp1++; continue; } /* compress spaces */ if (*cp1==' ') sp_flg=1; else sp_flg=0; /* (flag spaces here) */ - *cp2++=tolower(*cp1); /* normal character */ - cp1++; + if (hex_flg != 0 && + *cp1=='\\' && *(cp1+1)=='x' && + (*(cp1+2)>='0' && *(cp1+2)<='9' || + *(cp1+2)>='a' && *(cp1+2)<='f') && + (*(cp1+3)>='0' && *(cp1+3)<='9' || + *(cp1+3)>='a' && *(cp1+3)<='f')){ + *cp2++ = + ((*(cp1+2)>='0' && *(cp1+2)<='9') ? + *(cp1+2)-'0' : + *(cp1+2)-'a'+10)*16+ + ((*(cp1+3)>='0' && *(cp1+3)<='9') ? + *(cp1+3)-'0' : + *(cp1+3)-'a'+10); + cp1+=4; + hex_flg=2; + continue; + } + *cp2++=*cp1++; } } *cp2=0; cp2=tmpbuf; @@ -1839,9 +1892,29 @@ cp1=cp2+strlen(cp2)-1; while (cp1!=cp2) if (isspace(*cp1)) *cp1--='\0'; else break; + if(hex_flg == 2) cd = cd_from_sjis; + + if(cd != (iconv_t)-1){ + iconv(cd, NULL, 0, NULL, 0); + cp3 = cp2; + inlen = strlen(cp2)+1; + cp1 = tmpbuf2; + outlen = sizeof(tmpbuf2); + if(iconv(cd, (char **)&cp3, &inlen, + (char **)&cp1, &outlen) >= 0 && + inlen == 0){ + cp2=tmpbuf2; + } + } + // fprintf(stderr,"%d:%s:(%s)\n",hex_flg,cp2,log_rec.refer); + /* strip invalid chars */ cp1=cp2; - while (*cp1!=0) { if ((*cp1<32)||(*cp1==127)) *cp1='_'; cp1++; } + while (*cp1!=0) { + if ((*cp1<32)||(*cp1==127)) *cp1='_'; + *cp1=tolower(*cp1); /* normal character */ + *cp1++; + } if (put_snode(cp2,(u_long)1,sr_htab)) {