/*** analog 4.13 http://www.analog.cx/ ***/ /*** This program is copyright (c) Stephen R. E. Turner 1995 - 2000 except as *** stated otherwise. Distribution, usage and modification of this program is *** subject to the conditions of the Licence which you should have received *** with it. This program comes with no warranty, expressed or implied. ***/ /*** process.c; process some data ***/ #include "anlghea3.h" void process_data(Filelist *logfilep, FILE *lf, Hashtable **hash, Arraydata **arraydata, choice *count, choice *code2type, Include **wanthead, Include *ispagehead, Alias **aliashead, Include *argshead, Include *refargshead, Dateman *dman, Tree **tree, Derv **derv, choice *alltrees, choice *alldervs, choice *lowmem, logical case_insensitive, logical usercase_insensitive, unsigned char convfloor, char *dirsuffix, unsigned int dirsufflength, unsigned int granularity) { extern unsigned int year, month, date, hr, min, code; extern unsigned long unixtime, proctime; extern char am; extern double bytes; extern Memman mm[], mmq, mms, *amemman; extern choice *rep2type; extern Hashentry *unwanted_entry, *blank_entry; extern Hashindex *dummy_item; static Hashindex *gp[ITEM_NUMBER]; Hashentry *item[ITEM_NUMBER]; logical wanttree[ITEM_NUMBER]; logical isitpage, last7; choice ispage = UNSET; choice wanted = TRUE, rc, outcome; timecode_t timecode = FIRST_TIME; char *name, *namestart, *nameend; size_t len; choice i, j; /*** check whether this line is wanted ***/ if (count[INP_CODE] != 0) { if (code == IGNORE_CODE) { for (j = 0; j < ITEM_NUMBER; j++) { /* reset strings */ if (count[j] != 0) mm[j].next_pos = mm[j].curr_pos; } mmq.next_pos = mmq.curr_pos; mms.next_pos = mms.curr_pos; logfilep->data[LOGDATA_UNKNOWN]++; return; } else if (code2type[code] == UNWANTED) wanted = FALSE; } if (wanted && count[INP_DATE] > 0) { if (count[INP_UNIXTIME]) wanted = wantunixtime(&timecode, dman, unixtime, logfilep->tz); else { if (count[INP_AM]) { if (hr > 12) { corrupt_line(lf, logfilep, "Hour greater than 12", -1); return; } else if (hr == 12) hr = 0; if (am == 'p') hr += 12; } wanted = wantdate(&timecode, dman, hr, min, date, month, year, logfilep->tz); } if (wanted == ERR) { /* corrupt date */ corrupt_line(lf, logfilep, "Corrupt date or time", -1); return; } } /* end count[INP_DATE] > 0 */ for (i = 0; i < ITEM_NUMBER; i++) { wanttree[i] = FALSE; if (!wanted) { for (j = i; j < ITEM_NUMBER; j++) { /* reset not-yet-hashed strings */ if (count[j] != 0) /* NB i is now (unwanted i) + 1 */ mm[j].next_pos = mm[j].curr_pos; } mmq.next_pos = mmq.curr_pos; mms.next_pos = mms.curr_pos; logfilep->data[LOGDATA_UNWANTED]++; return; } if (i == ITEM_HOST) prealiasS(&(mm[ITEM_HOST]), &mms); name = (char *)(mm[i].curr_pos); if (count[i] == 0 || IS_EMPTY_STRING(name) || (name[0] == '-' && name[1] == '\0')) { item[i] = blank_entry; /* or unwanted_; but we get wanted right anyway */ wanted = (wanthead[i] == NULL || included("", FALSE, wanthead[i])); /* wanthead[i] == NULL is tested again in included() but it often saves a call to that function, because blankness is common. */ } else { if (i == ITEM_FILE || i == ITEM_REFERRER) { if ((j = prealias(&(mm[i]), &(mm[ITEM_VHOST]), item[ITEM_VHOST], &mmq, (logical)((i == ITEM_FILE)?case_insensitive:FALSE), (i == ITEM_FILE)?(logfilep->prefix):NULL, logfilep->prefixlen, logfilep->pvpos, (i == ITEM_FILE)?argshead:refargshead)) < 0) { if (j == -1) corrupt_line(lf, logfilep, "%v in file prefix but no VHOST in line", -1); else corrupt_line(lf, logfilep, "Filename too long", -1); return; } } if (lowmem[i] == 0) { if (gp[i] == NULL || !STREQ(name, gp[i]->name)) { gp[i] = hashfind(&mm[i], &(hash[i]), wanthead[i], UNSET, ispagehead, aliashead[i], dirsuffix, dirsufflength, usercase_insensitive, 0, i, FALSE); } /* if name the same as last time, don't need */ else /* to hashfind again, or save the name */ mm[i].next_pos = mm[i].curr_pos; item[i] = (Hashentry *)(gp[i]->other); wanted = (choice)(ENTRY_WANTED(item[i])); } else if (lowmem[i] == 1) { if ((rc = do_alias(name, amemman, aliashead[i], dirsuffix, dirsufflength, usercase_insensitive, 0, i)) == FALSE) { item[i] = hashfind(&mm[i], &(hash[i]), wanthead[i], UNSET, ispagehead, NULL, dirsuffix, dirsufflength, usercase_insensitive, 0, i, TRUE)->own; } else if (rc == TRUE) { mm[i].next_pos = mm[i].curr_pos; /* don't save string */ item[i] = hashfind(amemman, &(hash[i]), wanthead[i], UNSET, ispagehead, NULL, dirsuffix, dirsufflength, usercase_insensitive, 0, i, TRUE)->own; } else { /* rc == ERR */ mm[i].next_pos = mm[i].curr_pos; if (included("", FALSE, wanthead[i])) item[i] = blank_entry; else item[i] = unwanted_entry; } wanted = (choice)(ENTRY_WANTED(item[i])); } else { /* lowmem[i] >= 2 */ if ((rc = do_alias(name, amemman, aliashead[i], dirsuffix, dirsufflength, usercase_insensitive, 0, i)) == TRUE) { mm[i].next_pos = mm[i].curr_pos; /* don't save old string */ len = strlen((char *)(amemman->curr_pos)); memcpy(submalloc(&(mm[i]), len + 1), amemman->curr_pos, len + 1); name = (char *)(mm[i].curr_pos); /* which might have changed */ amemman->next_pos = amemman->curr_pos; } if (rc == ERR) { if (included("", FALSE, wanthead[i])) { item[i] = blank_entry; ispage = FALSE; } else wanted = FALSE; mm[i].next_pos = mm[i].curr_pos; } else { isitpage = pageq(name, ispagehead, i); if (i == ITEM_FILE) ispage = (choice)isitpage; if (included(name, isitpage, wanthead[i])) { if (lowmem[i] == 2) { item[i] = hashfind(&(mm[i]), &(hash[i]), wanthead[i], isitpage, ispagehead, NULL, dirsuffix, dirsufflength, usercase_insensitive, 0, i, TRUE)->own; } else { item[i] = blank_entry; wanttree[i] = TRUE; mm[i].next_pos = mm[i].curr_pos; } } else { wanted = FALSE; mm[i].next_pos = mm[i].curr_pos; } } } /* end lowmem[i] >= 2 */ } } /* end for i */ if (!wanted) { logfilep->data[LOGDATA_UNWANTED]++; return; } /*** now add it to the hash tables ***/ /* add to logfile from and to if wanted, whatever status code */ if (timecode != FIRST_TIME) logfilep->from = MIN(logfilep->from, timecode); logfilep->to = MAX(logfilep->to, timecode); last7 = (timecode > dman->last7from && timecode <= dman->last7to); if (ispage == UNSET) /* NB blank_entry has ispage FALSE */ ispage = (choice)(item[ITEM_FILE]->ispage); if (count[INP_BYTES] == 0) bytes = 0; if (count[INP_CODE] == 0) { outcome = SUCCESS; if (count[ITEM_FILE] == 2) { logfilep->data[LOGDATA_SUCC]++; logfilep->data[LOGDATA_SUCC7] += (unsigned long)last7; logfilep->data[LOGDATA_PAGES] += (unsigned long)ispage; logfilep->data[LOGDATA_PAGES7] += (unsigned long)((logical)ispage && last7); } else { logfilep->data[LOGDATA_UNKNOWN]++; logfilep->data[LOGDATA_UNKNOWN7] += (unsigned long)last7; } } else switch (outcome = code2type[code]) { case SUCCESS: logfilep->data[LOGDATA_SUCC]++; logfilep->data[LOGDATA_SUCC7] += (unsigned long)last7; logfilep->data[LOGDATA_PAGES] += (unsigned long)ispage; logfilep->data[LOGDATA_PAGES7] += (unsigned long)((logical)ispage && last7); break; case FAILURE: logfilep->data[LOGDATA_FAIL]++; logfilep->data[LOGDATA_FAIL7] += (unsigned long)last7; break; case REDIRECT: logfilep->data[LOGDATA_REDIR]++; logfilep->data[LOGDATA_REDIR7] += (unsigned long)last7; break; case INFO: logfilep->data[LOGDATA_INFO]++; logfilep->data[LOGDATA_INFO7] += (unsigned long)last7; break; } /* NB any change in what to count when will require corresponding change to end of strtoinfmt() and to fmt munching in correct() */ if (count[INP_CODE] == 2) arrayscore(arraydata[REP_CODE - FIRST_ARRAYREP], code, 1, 0, 0., timecode); if (outcome != INFO) { if (outcome == SUCCESS) { if (count[INP_DATE] == 2) /* only if file present: see strtoinfmt() */ datehash(timecode, dman, 1, (unsigned long)ispage, bytes, granularity); if (count[INP_BYTES] == 2) { arrayscore(arraydata[REP_SIZE - FIRST_ARRAYREP], bytes, 1, (unsigned long)ispage, bytes, timecode); logfilep->bytes += bytes; if (last7) logfilep->bytes7 += bytes; } if (count[INP_PROCTIME] == 2) arrayscore(arraydata[REP_PROCTIME - FIRST_ARRAYREP], proctime, 1, (unsigned long)ispage, bytes, timecode); for (i = 0; alltrees[i] != REP_NUMBER; i++) { if (wanttree[rep2type[alltrees[i]]]) { /* NB these trees only count successes */ dummy_item->name = mm[rep2type[alltrees[i]]].curr_pos; /* mm.curr_pos is marked for deletion, but still intact at present */ dummy_item->own->data[REQUESTS] = 1; dummy_item->own->data[PAGES] = (unsigned long)ispage; dummy_item->own->data[SUCCDATE] = timecode; dummy_item->own->bytes = bytes; namestart = NULL; tree[G(alltrees[i])]->cutfn(&namestart, &nameend, dummy_item->name, FALSE); (void)treefind(namestart, nameend, &(tree[G(alltrees[i])]->tree), dummy_item, tree[G(alltrees[i])]->cutfn, FALSE, TRUE, FALSE, tree[G(alltrees[i])]->space); } } for (i = 0; alldervs[i] != REP_NUMBER; i++) { /* same for derv's. Here we just call makederived() though. */ if (wanttree[rep2type[alldervs[i]]]) { dummy_item->name = mm[rep2type[alldervs[i]]].curr_pos; dummy_item->own->data[REQUESTS] = 1; dummy_item->own->data[PAGES] = (unsigned long)ispage; dummy_item->own->data[SUCCDATE] = timecode; dummy_item->own->bytes = bytes; dummy_item->next = NULL; namestart = NULL; makederived(derv[alldervs[i] - FIRST_DERVREP], dummy_item, NULL, convfloor, alldervs[i]); } } } /* end if outcome == SUCCESS */ for (i = 0; i < ITEM_NUMBER; i++) { if (count[i] == 2 && !ENTRY_BLANK(item[i])) hashscore(item[i], outcome, last7, (logical)ispage, timecode, bytes); } } /* end if outcome != INFO */ } void corrupt_line(FILE *lf, Filelist *logfilep, char *message, ptrdiff_t n) { extern FILE *errfile; extern char *debug_args, *record_start, *pos; pos = record_start; parsenonnewline(lf); /* skip to after next newline */ *pos = '\0'; if (pos - record_start > 500) /* debug might only handle 509 chars */ *(record_start + 500) = '\0'; debug('C', "%s", record_start); if (n < 0) debug('C', " (%s)", message); else if (strchr(debug_args, 'C')) { fprintf(errfile, "C: "); for ( ; n >= 10; n -= 10) fprintf(errfile, " "); for ( ; n > 0; n--) fputc(' ', errfile); fprintf(errfile, "*\n"); } logfilep->data[LOGDATA_CORRUPT]++; *pos = '\n'; (void)parsenewline(lf, NULL, '\0'); } void arrayscore(Arraydata *array, double amount, unsigned long reqs, unsigned long pages, double bytes, timecode_t timecode) { choice i; for (i = 0; ; i++) { /* last threshold must be -1 to ensure termination */ if (amount <= array[i].threshold || array[i].threshold < -0.5) { array[i].reqs += reqs; array[i].pages += pages; array[i].bytes += bytes; array[i].lastdate = MAX(array[i].lastdate, timecode); return; } } }