/*** analog 4.13 http://www.analog.cx/ ***/ /*** This program is copyright (c) Stephen R. E. Turner 1995 - 2000 except as *** stated otherwise. Distribution, usage and modification of this program is *** subject to the conditions of the Licence which you should have received *** with it. This program comes with no warranty, expressed or implied. ***/ /*** input.c; parsing the logfiles */ #include "anlghea3.h" extern char *block_start, *block_end, *block_bell, *record_start, *pos; extern logical termchar[]; extern unsigned int year, month, date, hr, min, code; extern double bytes; extern char am; static logical stdin_used = FALSE; FILE *my_lfopen(Filelist *p, char *filetype) { /* open for reading */ #ifndef NOPIPES extern Strpair *uncompresshead; char *cmd; Strpair *up; #endif FILE *f; if (IS_STDIN(p->name)) { if (stdin_used) { warn('F', TRUE, "stdin already used; cannot use it as %s", filetype); f = NULL; } else { f = stdin; stdin_used = TRUE; debug('F', "Opening stdin as %s", filetype); } } else { if ((f = FOPENR(p->name)) == NULL) warn('F', TRUE, "Failed to open %s %s: ignoring it", filetype, p->name); else { debug('F', "Opening %s as %s", p->name, filetype); #ifndef NOPIPES for (up = uncompresshead; up != NULL && !(p->ispipe); TO_NEXT(up)) { if (MATCHES(p->name, up->name)) { fclose(f); cmd = (char *)xmalloc(strlen(up->data) + strlen(p->name) + 2); sprintf(cmd, "%s %s", up->data, p->name); /* NB This doesn't work for filenames with spaces in, but putting quotes round p->name doesn't help because of the way the arguments are passed to the shell. */ f = POPENR(cmd); free((void *)cmd); p->ispipe = TRUE; debug('F', " Using %s to uncompress it", up->data); } } #endif } } return(f); } FILE *my_fopen(char *name, char *filetype) { /* open for reading */ FILE *f; if (IS_STDIN(name)) { if (stdin_used) { warn('F', TRUE, "stdin already used; cannot use it as %s", filetype); f = NULL; } else { f = stdin; stdin_used = TRUE; debug('F', "Opening stdin as %s", filetype); } } else { if ((f = FOPENR(name)) == NULL) warn('F', TRUE, "Failed to open %s %s: ignoring it", filetype, name); else debug('F', "Opening %s as %s", name, filetype); } return(f); } #define LFCLOSE_DATEFMT "%d/%m/%y:%H%n" int my_lfclose(FILE *f, Filelist *p, Filelist *allc, Filelist *allp, char *filetype, Dateman *dman, unsigned long *totdata, double *bys, double *bys7, char **monthname, unsigned int monthlen) { extern FILE *errfile; extern char *warn_args; unsigned long *data = p->data; char *datestr; Filelist *ap, *nextap; Inputformatlist *fmt; choice count[INPUT_NUMBER]; logical done; int i, rc; debug('F', "Closing %s %s", filetype, IS_STDIN(p->name)?"stdin":(p->name)); debug('S', "Successful requests: %lu", data[LOGDATA_SUCC]); debug('S', "Redirected requests: %lu", data[LOGDATA_REDIR]); debug('S', "Failed requests: %lu", data[LOGDATA_FAIL]); debug('S', "Requests returning informational status code: %lu", data[LOGDATA_INFO]); debug('S', "Status code not given: %lu", data[LOGDATA_UNKNOWN]); if (allp != NULL) { /* i.e. logfile not cache file */ debug('S', "Unwanted lines: %lu", data[LOGDATA_UNWANTED]); debug('S', "Corrupt lines: %lu", data[LOGDATA_CORRUPT]); if (data[LOGDATA_CORRUPT] > (data[LOGDATA_SUCC] + data[LOGDATA_REDIR] + data[LOGDATA_FAIL] + data[LOGDATA_INFO] + data[LOGDATA_UNKNOWN] + data[LOGDATA_UNWANTED]) / 10 && data[LOGDATA_CORRUPT] > 10) { warn('L', TRUE, "Large number of corrupt lines in %s %s: " "try different LOGFORMAT", filetype, IS_STDIN(p->name)?"stdin":(p->name)); if (strchr(warn_args, 'L') != NULL) report_logformat(errfile, p->format, TRUE); } } for (i = 0; i < LOGDATA_NUMBER; i++) totdata[i] += data[i]; *bys += p->bytes; *bys7 += p->bytes7; if (p->from <= p->to) { dman->firsttime = MIN(dman->firsttime, p->from); dman->lasttime = MAX(dman->lasttime, p->to); if (p->tz > 0) debug('S', "Times in %s offset by +%d minutes", filetype, p->tz); else if (p->tz < 0) debug('S', "Times in %s offset by %d minutes", filetype, p->tz); datestr = (char *)xmalloc((size_t)datefmtlen(LFCLOSE_DATEFMT, monthlen, 0, NULL, FALSE) + 1); debug('S', "Earliest entry in %s: %s", filetype, datesprintf(datestr, LFCLOSE_DATEFMT, OUT_NONE, p->from / 1440, (p->from % 1440) / 60, p->from % 60, 0, 0, monthname, NULL, 0, 0, NULL, FALSE)); debug('S', "Latest entry in %s: %s", filetype, datesprintf(datestr, LFCLOSE_DATEFMT, OUT_NONE, p->to / 1440, (p->to % 1440) / 60, p->to % 60, 0, 0, monthname, NULL, 0, 0, NULL, FALSE)); free((void *)datestr); if (allp == NULL) { p->format->count[ITEM_FILE] = 2; /* good enough for date check */ count[ITEM_FILE] = 2; p->from += 4; /* avoid false alerts */ } else { for (i = 0; i < INPUT_NUMBER; i++) count[i] = 0; for (fmt = p->format; fmt != NULL; TO_NEXT(fmt)) { for (i = 0; i < INPUT_NUMBER; i++) count[i] = MAX(count[i], fmt->count[i]); } } for (ap = (allc == NULL)?allp:allc; ap != p; ap = nextap) { if (ap->from < p->to && p->from < ap->to) { for (done = FALSE, fmt = ap->format; fmt != NULL && !done; TO_NEXT(fmt)) { for (i = 0; i < INPUT_NUMBER && !done; i++) { if (fmt->count[i] == 2 && count[i] == 2) { warn('L', TRUE, "%ss %s and %s overlap: possible double counting", filetype, ap->name, p->name); done = TRUE; } } } } nextap = ap->next; if (nextap == NULL) nextap = allp; /* run through allc then through allp */ } } else if (data[LOGDATA_SUCC] + data[LOGDATA_REDIR] + data[LOGDATA_FAIL] + data[LOGDATA_INFO] + data[LOGDATA_UNKNOWN] > 0) debug('S', "No times in %s", filetype); if (!(p->ispipe)) rc = fclose(f); /* Not much can go wrong with fclose. I hope. */ #ifndef NOPIPES else if (feof(f)) { if ((rc = pclose(f)) != 0) warn('F', TRUE, "Problems uncompressing %s %s", filetype, p->name); } else rc = pclose(f); /* not reached EOF: pclose will return broken pipe */ #endif return(rc); } int my_fclose(FILE *f, char *name, char *filetype) { debug('F', "Closing %s %s", filetype, IS_STDIN(name)?"stdin":name); return(fclose(f)); } FILE *logfile_init(Filelist *logfilep) { if (logfilep->format == NULL) { debug('F', "Ignoring logfile %s, which contains no items being analysed", logfilep->name); return(NULL); } pos = NULL; year = 0; bytes = 0; code = 0; return(my_lfopen(logfilep, "logfile")); } choice getmoredata(FILE *f, char *start, size_t length) { block_end = start + fread((void *)start, 1, length, f); block_bell = block_end - BLOCK_EPSILON; /* saves repeating this calc. */ if (block_end == start) return(EOF); return(OK); } choice getnextline(FILE *f, char *start, size_t used) { /* not very efficient: only for use during initialisation */ char *s = start; if ((*s = (char)getc(f)) == (char)EOF && feof(f)) /* in case char == 255 */ return(EOF); while (*s == '\r' || *s == '\n') *s = (char)getc(f); /* run past any new line */ if (*s == (char)EOF && feof(f)) return(EOF); for (*(++s) = (char)getc(f); *s != '\r' && *s != '\n' && !feof(f) && s < start + BLOCKSIZE - used - 1; *(++s) = (char)getc(f)) ; /* read in to next new line */ *s = '\0'; if (s == start + BLOCKSIZE - 1) return(FALSE); return(TRUE); } void shunt_data(FILE *f) { memmove((void *)block_start, (void *)record_start, (size_t)(block_end - record_start)); (void)getmoredata(f, block_start + (block_end - record_start), (size_t)(BLOCKSIZE - (block_end - record_start))); pos -= record_start - block_start; record_start = block_start; } int strtomonth(char *m) /* convert 3 letter month abbrev. to int */ { int monthno = ERR; switch (m[0]) { case 'A': case 'a': if ((m[1] == 'p' || m[1] == 'P') && (m[2] == 'r' || m[2] == 'R')) monthno = 3; else if ((m[1] == 'u' || m[1] == 'U') && (m[2] == 'g' || m[2] == 'G')) monthno = 7; break; case 'D': case 'd': if ((m[1] == 'e' || m[1] == 'E') && (m[2] == 'c' || m[2] == 'C')) monthno = 11; break; case 'F': case 'f': if ((m[1] == 'e' || m[1] == 'E') && (m[2] == 'b' || m[2] == 'B')) monthno = 1; break; case 'J': case 'j': if ((m[1] == 'a' || m[1] == 'A') && (m[2] == 'n' || m[2] == 'N')) monthno = 0; if ((m[1] == 'u' || m[1] == 'U')) { if ((m[2] == 'l' || m[2] == 'L')) monthno = 6; else if ((m[2] == 'n' || m[2] == 'N')) monthno = 5; } break; case 'M': case 'm': if ((m[1] == 'a' || m[1] == 'A')) { if ((m[2] == 'r' || m[2] == 'R')) monthno = 2; else if ((m[2] == 'y' || m[2] == 'Y')) monthno = 4; } break; case 'N': case 'n': if ((m[1] == 'o' || m[1] == 'O') && (m[2] == 'v' || m[2] == 'V')) monthno = 10; break; case 'O': case 'o': if ((m[1] == 'c' || m[1] == 'C') && (m[2] == 't' || m[2] == 'T')) monthno = 9; break; case 'S': case 's': if ((m[1] == 'e' || m[1] == 'E') && (m[2] == 'p' || m[2] == 'P')) monthno = 8; break; } return(monthno); } choice parsemonth(FILE *f, void *arg, char c) { unsigned int *m = (unsigned int *)arg; *block_end = '\0'; *m = (unsigned int)strtomonth(pos); if (*m == (unsigned int)ERR) return(FALSE); else { pos += 3; return(TRUE); } } choice parseuint2(FILE *f, void *arg, char c) { unsigned int *x = (unsigned int *)arg; *block_end = '\0'; if (!ISDIGIT(*pos)) { if (*pos == ' ') *x = 0; else return(FALSE); } else *x = *pos - '0'; pos++; if (!ISDIGIT(*pos)) return(TRUE); *x *= 10; *x += *pos - '0'; pos++; return(TRUE); } choice parsenmonth(FILE *f, void *arg, char c) { /* nearly same as uint2 */ unsigned int *x = (unsigned int *)arg; *block_end = '\0'; if (!ISDIGIT(*pos)) return(FALSE); *x = *pos - '0'; pos++; if (!ISDIGIT(*pos)) { if (*x == 0) { pos--; return(FALSE); } (*x)--; /* to convert to internal month representation */ return(TRUE); } *x *= 10; *x += *pos - '0'; if (*x == 0) return(FALSE); pos++; (*x)--; return(TRUE); } choice parseulong(FILE *f, void *arg, char c) { unsigned long *x = (unsigned long *)arg; register char *p = pos; *block_end = '\0'; if (!ISDIGIT(*p)) return(FALSE); *x = 0; while (ISDIGIT(*p)) { if (*x >= 214748364) /* max for Unix time, divided by 10 */ return(FALSE); /* best to keep pos at beginning of number this time */ *x *= 10; *x += (*p - '0'); p++; } pos = p; return(TRUE); } choice parseproctime(FILE *f, void *arg, char c) { /* parse processing time in decimal seconds, converting to milliseconds */ unsigned long *x = (unsigned long *)arg; register char *p = pos; *block_end = '\0'; if (!ISDIGIT(*p)) /* NB Decimals must begin "0." not just "." */ return(FALSE); if (ISDIGIT(*(p + 3)) && ISDIGIT(*(p + 2)) && ISDIGIT(*(p + 1))) { *x = 1000000; /* signifying at least 1000 seconds */ while (ISDIGIT(*p)) p++; if (*p == '.') { p++; while (ISDIGIT(*p)) p++; } pos = p; return(TRUE); } for (*x = 0; ISDIGIT(*p); p++) { *x *= 10; *x += (*p - '0'); } *x *= 1000; if (*p == '.') { /* decimal follows; only read first 3 places */ p++; if (ISDIGIT(*p)) { *x += 100 * (*(p++) - '0'); if (ISDIGIT(*p)) { *x += 10 * (*(p++) - '0'); if (ISDIGIT(*p)) { *x += (*(p++) - '0'); while (ISDIGIT(*p)) p++; } } } } pos = p; return(TRUE); } choice parseuint2strict(FILE *f, void *arg, char c) { unsigned int *x = (unsigned int *)arg; *block_end = '\0'; if (!ISDIGIT(*pos)) return(FALSE); *x = 10 * (*pos - '0'); pos++; if (!ISDIGIT(*pos)) return(FALSE); *x += *pos - '0'; pos++; return(TRUE); } choice parsescode(FILE *f, void *arg, char c) { /* parseuint3strict() with twiddles */ unsigned int *x = (unsigned int *)arg; *block_end = '\0'; if (*pos < '1' || *pos > '5') { /* only accept codes 100 - 599, or - */ if (*pos == '-') { /* see also MIN_SC, SC_NUMBER in anlghea3.h */ pos++; *x = 200; return(TRUE); } else return(FALSE); } *x = 100 * (*pos - '0'); pos++; if (!ISDIGIT(*pos)) return(FALSE); *x += 10 * (*pos - '0'); pos++; if (!ISDIGIT(*pos)) return(FALSE); *x += *pos - '0'; pos++; return(TRUE); } choice parseuint4strict(FILE *f, void *arg, char c) { unsigned int *x = (unsigned int *)arg; *block_end = '\0'; if (!ISDIGIT(*pos)) return(FALSE); *x = 1000 * (*pos - '0'); pos++; if (!ISDIGIT(*pos)) return(FALSE); *x += 100 * (*pos - '0'); pos++; if (!ISDIGIT(*pos)) return(FALSE); *x += 10 * (*pos - '0'); pos++; if (!ISDIGIT(*pos)) return(FALSE); *x += *pos - '0'; pos++; return(TRUE); } choice parseudint(FILE *f, void *arg, char c) { double *x = (double *)arg; *block_end = '\0'; if (*pos == '-') { *x = 0.0; /* because used for bytes (only) */ pos++; return(TRUE); } if (!ISDIGIT(*pos)) return(FALSE); *x = *pos - '0'; pos++; while (ISDIGIT(*pos)) { *x *= 10; *x += *pos - '0'; pos++; } return(TRUE); } choice parseyear(FILE *f, void *arg, char c) { unsigned int *y = (unsigned int *)arg; logical rc; rc = parseuint2strict(f, arg, c); if (*y >= 70) *y += 1900; else *y += 2000; return(rc); } choice parseam(FILE *f, void *arg, char c) { char *d = (char *)arg; *block_end = '\0'; *d = TOLOWER(*pos); if (*d == 'a' || *d == 'p') { pos++; return(TRUE); } else return(FALSE); } choice parsecode(FILE *f, void *arg, char c) { unsigned int *x = (unsigned int *)arg; char *d = pos, e; *block_end = '\0'; if (parsejunk(f, NULL, c) == FALSE || pos == d) return(FALSE); e = *(pos - 1); *(pos - 1) = '\0'; *x = IGNORE_CODE; if (d[0] == 'O' && d[1] == 'K') *x = 200; else if (STREQ(d, "ERR!")) *x = 404; else if (STREQ(d, "PRIV")) *x = 401; else if (STREQ(d, "not modified")) *x = 304; else if (*(d++) == 'g' && *(d++) == 'e' && *(d++) == 't' && *(d++) == ' ' && *(d++) == 'f') { if (STREQ(d, "ile")) *x = 200; else if (STREQ(d, "ailed")) *x = 499; } *(pos - 1) = e; return(TRUE); } choice parsejunk(FILE *f, void *arg, char c) { /* NB allows empty strings */ register char *p = pos; if (c == WHITESPACE) { termchar[(unsigned char)' '] = TRUE; termchar[(unsigned char)'\t'] = TRUE; *block_end = ' '; } else { termchar[(unsigned char)c] = TRUE; *block_end = c; } while (!termchar[(unsigned char)(*p)]) p++; if (p == block_end) { if (record_start == block_start) { if (c == WHITESPACE) { termchar[(unsigned char)' '] = FALSE; termchar[(unsigned char)'\t'] = FALSE; } else if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; pos = p; return(FALSE); } pos = p; shunt_data(f); p = pos; *block_end = (c == WHITESPACE)?' ':c; while (!termchar[(unsigned char)(*p)]) p++; if (p == block_end) { if (c == WHITESPACE) { termchar[(unsigned char)' '] = FALSE; termchar[(unsigned char)'\t'] = FALSE; } else if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; pos = p; return(FALSE); } } if (c == WHITESPACE) { termchar[(unsigned char)' '] = FALSE; termchar[(unsigned char)'\t'] = FALSE; } else if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; if (*p != c && !(c == '\n' && *p == '\r') && !(c == WHITESPACE && (*p == ' ' || *p == '\t'))) { pos = p; return(FALSE); } if (c == '\n' || c == '\r') { while ((*p == '\n' || *p == '\r') && p < block_end) p++; } else if (c != WHITESPACE) p++; pos = p; return(TRUE); } choice parsespace(FILE *f, void *arg, char c) { *block_end = '\0'; if (pos == block_end && record_start != block_start) shunt_data(f); if (*pos != '\t' && *pos != ' ') return(FALSE); while (*pos == '\t' || *pos == ' ') pos++; if (pos == block_end && record_start != block_start) { shunt_data(f); while (*pos == '\t' || *pos == ' ') pos++; } return(TRUE); } choice parseoptspace(FILE *f, void *arg, char c) { *block_end = '\0'; if (pos == block_end && record_start != block_start) shunt_data(f); while (*pos == '\t' || *pos == ' ') pos++; if (pos == block_end && record_start != block_start) { shunt_data(f); while (*pos == '\t' || *pos == ' ') pos++; } return(TRUE); } choice parsenewline(FILE *f, void *arg, char c) { /* allow new line to be preceded by white space */ *block_end = 'a'; if (*pos != '\n' && *pos != ' ' && *pos != '\r' && *pos != '\t') return(FALSE); while (*pos == ' ' || *pos == '\t') pos++; if (pos == block_end && record_start != block_start) { shunt_data(f); *block_end = 'a'; while (*pos == ' ' || *pos == '\t') pos++; } if (*pos != '\n' && *pos != '\r') return(FALSE); termchar[(unsigned char)'\0'] = FALSE; while (termchar[(unsigned char)(*pos)]) pos++; if (pos == block_end && record_start != block_start) { shunt_data(f); *block_end = 'a'; while (termchar[(unsigned char)(*pos)]) pos++; } termchar[(unsigned char)'\0'] = TRUE; return(TRUE); } void parsenonnewline(FILE *f) { *block_end = '\n'; termchar[(unsigned char)'\0'] = FALSE; while (!termchar[(unsigned char)(*pos)]) pos++; if (pos == block_end) { if (record_start == block_start) { termchar[(unsigned char)'\0'] = TRUE; return; } shunt_data(f); *block_end = '\n'; while (!termchar[(unsigned char)(*pos)]) pos++; } termchar[(unsigned char)'\0'] = TRUE; } choice parsestring(FILE *f, void *arg, char c) { /* NB allows empty strings */ register char *p = pos; Memman *m = (Memman *)arg; size_t length = 0; char *string_start; string_start = p; if (c == WHITESPACE) { termchar[(unsigned char)' '] = TRUE; termchar[(unsigned char)'\t'] = TRUE; *block_end = ' '; } else { termchar[(unsigned char)c] = TRUE; *block_end = c; } while (!termchar[(unsigned char)(*p)]) { p++; length++; } if (p == block_end) { if (record_start == block_start) { if (c == WHITESPACE) { termchar[(unsigned char)' '] = FALSE; termchar[(unsigned char)'\t'] = FALSE; } else if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; pos = p; return(FALSE); } string_start -= record_start - block_start; pos = p; shunt_data(f); p = pos; *block_end = (c == WHITESPACE)?' ':c; while (!termchar[(unsigned char)(*p)]) { p++; length++; } if (p == block_end) { if (c == WHITESPACE) { termchar[(unsigned char)' '] = FALSE; termchar[(unsigned char)'\t'] = FALSE; } else if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; pos = p; return(FALSE); } } if (c == WHITESPACE) { termchar[(unsigned char)' '] = FALSE; termchar[(unsigned char)'\t'] = FALSE; } else if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; if (*p != c && !(c == '\n' && *p == '\r') && !(c == WHITESPACE && (*p == ' ' || *p == '\t'))) { pos = p; return(FALSE); } memcpy(submalloc(m, length + 1), (void *)string_start, length); *((char *)(m->next_pos) - 1) = '\0'; /* = curr_pos + length */ if (c == '\n' || c == '\r') { while ((*p == '\n' || *p == '\r') && p < block_end) p++; } else if (c != WHITESPACE) p++; pos = p; return(TRUE); } choice parsemsbrow(FILE *f, void *arg, char c) { Memman *m = (Memman *)arg; char *d; if (parsestring(f, arg, c) == FALSE) return(FALSE); for (d = m->curr_pos; *d != '\0'; d++) { if (*d == '+') *d = ' '; } return(TRUE); } choice parseref(FILE *f, void *arg, char c) { /* For referrer in old referrer logs: as parsestring except also checks -> immediately after delimiting character (presumably space: may not be \n or \r: also WHITESPACE wouldn't work so is disallowed in strtoinfmt()) */ register char *p = pos; Memman *m = (Memman *)arg; size_t length = 0; char *string_start; string_start = p; *block_end = '\r'; termchar[(unsigned char)c] = TRUE; while (!termchar[(unsigned char)(*p)] || (*p == c && (*(p + 1) != '-' || *(p + 2) != '>'))) { p++; length++; } if (p == block_end) { if (record_start == block_start) { if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; pos = p; return(FALSE); } string_start -= record_start - block_start; pos = p; shunt_data(f); p = pos; *block_end = '\r'; while (!termchar[(unsigned char)(*p)] || (*p == c && (*(p + 1) != '-' || *(p + 2) != '>'))) { p++; length++; } if (p == block_end) { if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; pos = p; return(FALSE); } } if (c != '\r' && c != '\n' && c != '\0') termchar[(unsigned char)c] = FALSE; if (*p != c) { pos = p; return(FALSE); } memcpy(submalloc(m, length + 1), (void *)string_start, length); *((char *)(m->next_pos) - 1) = '\0'; p++; pos = p; return(TRUE); } choice checkchar(FILE *f, void *arg, char c) { if (*pos == c) { pos++; return(TRUE); } else return(FALSE); } void resolveconflicts(unsigned int n, char *delims, ...) { /* Used in parselogfmt(). Munges a format header line, which is in pos, so that there are no "repeated" fields. n is the number of alternatives. */ va_list ap; char *arg, *s, *t; logical found; unsigned int i; va_start(ap, delims); for (i = 0, found = FALSE; i < n; i++) { arg = va_arg(ap, char *); for (s = strstr(pos, arg); s != NULL; s = strstr(s + 1, arg)) { t = s + strlen(arg); if ((s == pos || strchr(delims, *(s - 1)) != NULL) && (*t == '\0' || strchr(delims, *t) != NULL)) { /* true match */ if (found) { /* found an earlier occurrence */ *s = 'X'; *(s + 1) = 'X'; } else /* no earlier occurrence: leave this one alone */ found = TRUE; } } } va_end(ap); } choice parselogfmt(FILE *f, void *arg, char c) { extern Inputformatlist *logformat; extern char *workspace; /* assume large enough, as usual */ char *d, *e; unsigned int x, y; logformat->used = TRUE; /* so as to start afresh */ *block_end = '\n'; d = strpbrk(pos, "\r\n"); if (d == block_end) { shunt_data(f); *block_end = '\n'; d = strpbrk(pos, "\r\n"); if (d == block_end) return(FALSE); } if (d == NULL) return(BADLOGFMT); else *d = '\0'; switch (c) { case '0': /* DEFAULT format. These are caught and translated earlier: this is just in case we have specified DEFAULTLOGFORMAT DEFAULT */ case '1': /* AUTO format */ if (*pos == '!' && *(pos + 1) == '!') { debug('F', " Detect that it's in WebSTAR format"); configlogfmt((void *)&logformat, NULL, "WEBSTAR", NULL, -1); } else if (chrn(pos, ',') == 15) { e = strchr(strchr(pos, ',') + 1, ',') + 2; x = (*(e++) - '0'); if (*e != '/') x = x * 10 + (*(e++) - '0'); y = (*(++e) - '0'); if (*(++e) != '/') y = y * 10 + (*e - '0'); if (x <= 12 && y > 12) { debug('F', " Detect that it's in Microsoft format (North American dates)"); configlogfmt((void *)&logformat, NULL, "MICROSOFT-NA", NULL, -1); } else if (x > 12 && y <= 12) { debug('F', " Detect that it's in Microsoft format (international dates)"); configlogfmt((void *)&logformat, NULL, "MICROSOFT-INT", NULL, -1); } else { warn('F', TRUE, "Microsoft logfile with ambiguous dates: " "use LOGFORMAT MICROSOFT-NA or LOGFORMAT MICROSOFT-INT"); *d = '\n'; return(BADLOGFMT); } } else if (*pos == 'f' && *(pos + 1) == 'o' && *(pos + 2) == 'r' && *(pos + 3) == 'm' && *(pos + 4) == 'a' && *(pos + 5) == 't' && *(pos + 6) == '=') { debug('F', " Detect that it's in Netscape format"); configlogfmt((void *)&logformat, NULL, "NETSCAPE", NULL, -1); } else if (ISDIGIT(*pos) && ISDIGIT(*(pos + 3)) && *(pos + 5) == '/' && *(pos + 14) == ':' && ISDIGIT(*(pos + 16)) && chrn(pos, '\t') == 12) { x = (*pos - '0') * 10 + (*(pos + 1) - '0'); y = (*(pos + 3) - '0') * 10 + (*(pos + 4) - '0'); if (x <= 12 && y > 12) { debug('F', " Detect that it's in WebSite format (North American dates)"); configlogfmt((void *)&logformat, NULL, "WEBSITE-NA", NULL, -1); } else if (x > 12 && y <= 12) { debug('F', " Detect that it's in Website format (international dates)"); configlogfmt((void *)&logformat, NULL, "WEBSITE-INT", NULL, -1); } else { warn('F', TRUE, "WebSite logfile with ambiguous dates: " "use LOGFORMAT WEBSITE-NA or LOGFORMAT WEBSITE-INT"); *d = '\n'; return(BADLOGFMT); } } else if (strstr(pos, " -> ") != NULL) { debug('F', " Detect that it's in referrer log format"); configlogfmt((void *)&logformat, NULL, "REFERRER", NULL, -1); } else if (*pos == '[' && (*(pos + 21) == ']' || *(pos + 27) == ']')) { debug('F', " Detect that it's in browser log format"); configlogfmt((void *)&logformat, NULL, "BROWSER", NULL, -1); } else if (*pos == '#') { /* look at first the two lines to determine vendor, although we expect Microsoft string on first line, and WebSTAR on second. */ for (e = d + 1; *e == '\n' || *e == '\r'; e++) ; if (headmatch(pos + 1, "Software: Microsoft Internet Information Serv") || headmatch(e, "#Software: Microsoft Internet Information Serv")) { debug('F', " Detect that it's in Microsoft's version of extended format"); configlogfmt((void *)&logformat, NULL, "MS-EXTENDED", NULL, -1); } else if (headmatch(e, "#Software: WebSTAR") || headmatch(pos + 1, "Software: WebSTAR")) { debug('F', " Detect that it's in extended format (WebSTAR server)"); configlogfmt((void *)&logformat, NULL, "WEBSTAR-EXTENDED", NULL, -1); } else { debug('F', " Detect that it's in W3 extended format"); configlogfmt((void *)&logformat, NULL, "EXTENDED", NULL, -1); } } else if ((e = strchr(pos + 6, '[')) != NULL && *(e + 27) == ']' && strchr(pos, '"') == e + 29) { x = chrn(e + 23, '"'); if (x == 2) { debug('F', " Detect that it's in common log format"); configlogfmt((void *)&logformat, NULL, "COMMON", NULL, -1); } else if (x == 3) { debug('F', " Detect that it's in Microsoft's broken common log format"); configlogfmt((void *)&logformat, NULL, "MS-COMMON", NULL, -1); } else if (x == 6) { debug('F', " Detect that it's in NCSA combined format"); configlogfmt((void *)&logformat, NULL, "COMBINED", NULL, -1); } else { *d = '\n'; return(BADLOGFMT); } } else { *d = '\n'; return(BADLOGFMT); } *d = '\n'; break; case '2': /* WebSTAR format */ /* pos is already start of format proper */ strtoupper(pos); resolveconflicts(5, " \t", "SC-STATUS", "RESULT_CODE", "STATUS", "CS-STATUS", "RESULT"); resolveconflicts(3, " \t", "CS-URI-STEM", "CS-URI", "URL"); resolveconflicts(2, " \t", "BYTES", "BYTES_SENT"); resolveconflicts(4, " \t", "HOSTNAME", "C-DNS", "CS-HOST", "CS-DNS"); resolveconflicts(2, " \t", "C-IP", "CS-IP"); resolveconflicts(2, " \t", "REFERER", "CS(REFERER)"); resolveconflicts(2, " \t", "AGENT", "CS(USER-AGENT)"); resolveconflicts(4, " \t", "HOSTFIELD", "CS(HOST)", "HOST", "CS-SIP"); resolveconflicts(2, " \t", "SEARCH_ARGS", "CS-URI-QUERY"); workspace[0] = '\0'; pos = strtok(pos, " \t"); while (pos != NULL) { if (!IS_EMPTY_STRING(workspace)) strcat(workspace, "\t"); if (STREQ(pos, "COMMON_LOG_FORMAT")) strcpy(workspace, "COMMON"); else if (STREQ(pos, "DATE")) strcat(workspace, "%m/%d/%y"); else if (STREQ(pos, "TIME")) strcat(workspace, "%W%h:%n:%j"); else if (STREQ(pos, "RESULT")) strcat(workspace, "%C"); else if (STREQ(pos, "SC-STATUS") || STREQ(pos, "CS-STATUS") || STREQ(pos, "RESULT_CODE") || STREQ(pos, "STATUS")) strcat(workspace, "%c"); else if (STREQ(pos, "URL") || STREQ(pos, "CS-URI-STEM") || STREQ(pos, "CS-URI")) strcat(workspace, "%r"); else if (STREQ(pos, "BYTES") || STREQ(pos, "BYTES_SENT")) strcat(workspace, "%b"); else if (STREQ(pos, "HOSTNAME") || STREQ(pos, "CS-HOST") || STREQ(pos, "C-DNS") || STREQ(pos, "CS-DNS")) strcat(workspace, "%S"); else if (STREQ(pos, "CS-IP") || STREQ(pos, "C-IP")) strcat(workspace, "%s"); else if (STREQ(pos, "REFERER")) strcat(workspace, "%f"); else if (STREQ(pos, "CS(REFERER)")) strcat(workspace, "\"%f\""); else if (STREQ(pos, "AGENT")) strcat(workspace, "%B"); else if (STREQ(pos, "CS(USER-AGENT)")) strcat(workspace, "\"%B\""); else if (STREQ(pos, "CS(HOST)") || STREQ(pos, "HOSTFIELD")) strcat(workspace, "\"%v\""); else if (STREQ(pos, "HOST") || STREQ(pos, "CS-SIP")) strcat(workspace, "%v"); else if (STREQ(pos, "SEARCH_ARGS") || STREQ(pos, "CS-URI-QUERY")) strcat(workspace, "%q"); else if (STREQ(pos, "USER")) strcat(workspace, "%u"); else if (STREQ(pos, "TRANSFER_TIME")) strcat(workspace, "%t"); else strcat(workspace, "%j"); pos = strtok((char *)NULL, " \t"); } configlogfmt((void *)&logformat, NULL, "WEBSTAR", NULL, -1); configlogfmt((void *)&logformat, NULL, workspace, NULL, -3); pos = d + 1; /* start at next line */ (void)parsenewline(f, NULL, '\0'); break; case '3': /* W3 extended format */ case '5': /* Microsoft's attempt at same */ case '6': /* WebSTAR's (much closer :-) attempt at same */ strtoupper(pos); resolveconflicts(2, " \t", "BYTES", "SC-BYTES"); resolveconflicts(2, " \t", "C-IP", "CS-IP"); if (c == '6') { /* WebSTAR */ resolveconflicts(3, " \t", "C-DNS", "CS-DNS", "CS-HOST"); resolveconflicts(6, " \t", "CS(HOST)", "S-DNS", "S-IP", "CS-SIP", "S-SITENAME", "S-COMPUTERNAME"); } else { resolveconflicts(2, " \t", "C-DNS", "CS-DNS"); resolveconflicts(7, " \t", "CS(HOST)", "CS-HOST", "S-DNS", "S-IP", "S-SITENAME", "S-COMPUTERNAME", "CS-SIP"); } resolveconflicts(2, " \t", "CS-URI-STEM", "CS-URI"); resolveconflicts(3, " \t", "CS-USERNAME", "CS(FROM)", "CS(COOKIE)"); workspace[0] = '\0'; pos = strtok(pos, " \t"); while (pos != NULL) { if (!IS_EMPTY_STRING(workspace)) strcat(workspace, "%w"); if (STREQ(pos, "DATE")) strcat(workspace, "%Y-%m-%d"); else if (STREQ(pos, "TIME")) strcat(workspace, "%h:%n:%j"); else if (STREQ(pos, "BYTES") || STREQ(pos, "SC-BYTES")) strcat(workspace, "%b"); else if (STREQ(pos, "SC-STATUS")) strcat(workspace, "%c"); else if (STREQ(pos, "C-DNS") || STREQ(pos, "CS-DNS")) strcat(workspace, "%S"); else if (STREQ(pos, "CS-IP") || STREQ(pos, "C-IP")) strcat(workspace, "%s"); else if (STREQ(pos, "CS-URI-STEM") || STREQ(pos, "CS-URI")) strcat(workspace, "%r"); else if (STREQ(pos, "CS(REFERER)")) { if (c == '5') /* Microsoft */ strcat(workspace, "%f"); else strcat(workspace, "\"%f\""); } else if (STREQ(pos, "CS(USER-AGENT)")) { if (c == '5') /* Microsoft */ strcat(workspace, "%A"); else strcat(workspace, "\"%B\""); } else if (STREQ(pos, "CS-HOST")) { if (c == '6') /* WebSTAR */ strcat(workspace, "%S"); else strcat(workspace, "%v"); } else if (STREQ(pos, "CS(HOST)")) strcat(workspace, "\"%v\""); else if (STREQ(pos, "S-IP") || STREQ(pos, "S-DNS") || STREQ(pos, "CS-SIP") || STREQ(pos, "S-SITENAME") || STREQ(pos, "S-COMPUTERNAME")) strcat(workspace, "%v"); else if (STREQ(pos, "CS-URI-QUERY")) strcat(workspace, "%q"); else if (STREQ(pos, "CS(FROM)")) strcat(workspace, "\"%u\""); else if (STREQ(pos, "CS-USERNAME") || STREQ(pos, "CS(COOKIE)")) strcat(workspace, "%u"); else if (STREQ(pos, "TIME-TAKEN")) { if (c == '5') /* Microsoft */ strcat(workspace, "%T"); else strcat(workspace, "%t"); } else strcat(workspace, "%j"); pos = strtok((char *)NULL, " \t"); } if (c == '3') configlogfmt((void *)&logformat, NULL, "EXTENDED", NULL, -1); else if (c == '5') configlogfmt((void *)&logformat, NULL, "MS-EXTENDED", NULL, -1); else /* c == '6' */ configlogfmt((void *)&logformat, NULL, "WEBSTAR-EXTENDED", NULL, -1); configlogfmt((void *)&logformat, NULL, workspace, NULL, -3); pos = d + 1; (void)parsenewline(f, NULL, '\0'); break; case '4': /* Netscape format */ resolveconflicts(2, "%", "Req->vars.auth-user", "Req->vars.pauth-user"); resolveconflicts(2, "%", "Req->reqpb.clf-request", "Req->reqpb.proxy-request"); resolveconflicts(3, "%", "Req->srvhdrs.content-length", "Req->headers.content-length", "Req->vars.p2c-cl"); resolveconflicts(2, "%", "Req->vars.xfer-time", "Req->vars.xfer-time-total"); workspace[0] = '\0'; while (*pos != '\0') { if (*pos != '%') { e = strchr(workspace, '\0'); *e = *(pos++); *(e + 1) = '\0'; } else { if ((e = strchr(++pos, '%')) == NULL) return(BADLOGFMT); else *e = '\0'; if (STREQ(pos, "Ses->client.ip")) strcat(workspace, "%S"); else if (STREQ(pos, "Req->vars.auth-user") || STREQ(pos, "Req->vars.pauth-user")) strcat(workspace, "%u"); else if (STREQ(pos, "SYSDATE")) strcat(workspace, "%d/%M/%Y:%h:%n:%j"); else if (STREQ(pos, "Req->reqpb.clf-request") || STREQ(pos, "Req->reqpb.proxy-request")) strcat(workspace, "%j%w%r%wHTTP%j"); else if (STREQ(pos, "Req->srvhdrs.clf-status")) strcat(workspace, "%c"); else if (STREQ(pos, "Req->srvhdrs.content-length") || STREQ(pos, "Req->headers.content-length") || STREQ(pos, "Req->vars.p2c-cl")) strcat(workspace, "%b"); else if (STREQ(pos, "Req->headers.referer")) strcat(workspace, "%f"); else if (STREQ(pos, "Req->headers.user-agent")) strcat(workspace, "%B"); else if (STREQ(pos, "Req->headers.host")) strcat(workspace, "%v"); else if (STREQ(pos, "Req->vars.xfer-time") || STREQ(pos, "Req->vars.xfer-time-total")) strcat(workspace, "%t"); else strcat(workspace, "%j"); pos = e + 1; } } configlogfmt((void *)&logformat, NULL, "NETSCAPE", NULL, -1); configlogfmt((void *)&logformat, NULL, workspace, NULL, -3); pos = d + 1; (void)parsenewline(f, NULL, '\0'); break; } return(NEWLOGFMT); } choice parsenextrecord(FILE *f, Inputformat *format) { Inputformat *ipf, *ipf2; logical rc; if (pos == NULL) { if (getmoredata(f, block_start, BLOCKSIZE) == EOF) return(EOF); pos = block_start; record_start = pos; } pos = record_start; for (ipf = format; ipf->inpfns != NULL; TO_NEXT(ipf)) { if (pos > block_bell) { if (record_start != block_start) shunt_data(f); if (pos == block_end && block_end - block_start < BLOCKSIZE) return(EOF); } rc = ipf->inpfns->fn(f, ipf->inpfns->opt, ipf->sep); if (rc != TRUE) { for (ipf2 = format; ipf2 != ipf; TO_NEXT(ipf2)) { if (ipf2->inpfns->fn == &parsestring || ipf2->inpfns->fn == &parseref) ((Memman *)(ipf2->inpfns->opt))->next_pos = ((Memman *)(ipf2->inpfns->opt))->curr_pos; /* reset strings; NB ipf returned !TRUE so didn't allocate */ } return(rc); } } return(TRUE); } int parseconfline(char *s, char **cmd, char **arg1, char **arg2) { char *c, d; for (c = s; *c == ' ' || *c == '\t'; c++) ; /* run past white space */ if (*c == '\0' || *c == '#') return(-1); *cmd = c; while (*c != ' ' && *c != '\t' && *c != '\0' && *c != '#') c++; if (*c == '\0' || *c == '#') { *c = '\0'; return(0); } *c = '\0'; c++; for ( ; *c == ' ' || *c == '\t'; c++) ; /* run past white space again */ if (*c == '\0' || *c == '#') return(0); *arg1 = c; if (*c == '\'' || *c == '"' || *c == '(') { d = (*c == '(')?')':(*c); /* terminating character for next string */ *arg1 = (++c); while (*c != d && *c != '\0') c++; } else while (*c != ' ' && *c != '\t' && *c != '\0' && *c != '#') c++; if (*c == '\0' || *c == '#') { *c = '\0'; return(1); } *c = '\0'; c++; for ( ; *c == ' ' || *c == '\t'; c++) ; if (*c == '\0' || *c == '#') return(1); *arg2 = c; if (*c == '\'' || *c == '"' || *c == '(') { d = (*c == '(')?')':(*c); *arg2 = (++c); while (*c != d && *c != '\0') c++; } else while (*c != ' ' && *c != '\t' && *c != '\0' && *c != '#') c++; if (*c == '\0' || *c == '#') { *c = '\0'; return(2); } *c = '\0'; c++; for ( ; *c == ' ' || *c == '\t'; c++) ; if (*c == '\0' || *c == '#') return(2); return(3); } int nextconfline(FILE *f, char **cmd, char **arg1, char **arg2) { /* if 255 increased, so must u[] be in confline() */ size_t l; int rc; *cmd = NULL; *arg1 = NULL; *arg2 = NULL; while (TRUE) { if (getnextline(f, block_start, 0) == EOF) return(EOF); while ((l = strlen(block_start)) > 0 && *(block_start + l - 1) == '\\') { if (getnextline(f, block_start + l - 1, l - 1) == EOF) return(EOF); } if (l >= 255) { *(block_start + 70) = '\0'; warn('C', TRUE, "Ignoring long configuration line starting\n%s", block_start); } else if ((rc = parseconfline(block_start, cmd, arg1, arg2)) != -1) return(rc); /* o/wise line was blank or a comment: go round again */ } } char *nextlngstr(FILE *f, char *name, logical want) { while (TRUE) { if (getnextline(f, block_start, 0) == EOF) { /* EOF reached */ if (want) error("language file %s too short", name); else return((char *)NULL); } if (strlen(block_start) >= 255) error("language file %s contains excessively long lines", name); if (block_start[0] != '#' || block_start[1] != '#') { /* found a line */ if (want) return(block_start); else error("language file %s too long", name); } /* otherwise just found a comment; go round again */ } } choice nextdnsline(FILE *f, timecode_t *timec, char **name, char **alias) { char *timestr = NULL; *name = NULL; *alias = NULL; if (getnextline(f, block_start, 0) == EOF) return(EOF); if (strlen(block_start) >= 255) { *(block_start + 70) = '\0'; warn('C', TRUE, "Ignoring long line in DNS file starting\n%s", block_start); return(FALSE); } if ((timestr = strtok(block_start, " ")) == NULL || !ISDIGIT(*timestr) || (*name = strtok((char *)NULL, " ")) == NULL || (*alias = strtok((char *)NULL, " ")) == NULL || strtok((char *)NULL, " ") != NULL || (*timec = strtoul(timestr, (char **)NULL, 10)) == 0) { warn('C', TRUE, "Ignoring corrupt line in DNS file looking like\n%s", block_start); return(FALSE); } return(TRUE); } void process_domainsfile(FILE *f, Options *op) { /* size of v is bounded because u in confline is */ char *s, *t, *c, *u, v[256]; while (TRUE) { if (getnextline(f, block_start, 0) == EOF) return; if (*block_start == '#') ; /* ignore comment line */ else if (strlen(block_start) >= 250) { *(block_start + 70) = '\0'; warn('C', TRUE, "Ignoring long line in domains file starting\n%s", block_start); } else { for (s = block_start; *s == ' ' || *s == '\t'; s++) ; for (t = s; *t != ' ' && *t != '\t' && *t != '\0'; t++) ; if (*t == '\0') warn('C', TRUE, "Ignoring incomplete line in domains file\n%s", block_start); else { for (c = t + 1; *c == ' ' || *c == '\t'; c++) ; if (*c < '1' || *c > '9' || (*(c + 1) != ' ' && *(c + 1) != '\t')) warn('C', TRUE, "Ignoring corrupt line in domains file\n%s", block_start); else { *t = '\0'; *(c + 1) = '\0'; for (t = c + 2; *t == ' ' || *t == '\t'; t++) ; for (u = strchr(block_start, '\0') - 1; (*u == ' ' || *u == '\t') && u > t; u--) ; if (u > t) *(u + 1) = '\0'; confline(op, "DOMLEVEL", s, c, -1); sprintf(v, ".%s (%s)", s, t); confline(op, "DOMOUTPUTALIAS", s, v, -1); } } } } }