#include "openaxiom-c-macros.h"
#include "debug.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <regex.h>
#include <locale.h>
#include "cfuns.h"
#define MAX_HTDB_LINE 1024
#define MAX_ENTRY_TYPE 30
#define MAX_ENTRY_NAME 1024
#define MAX_COMP_REGEX 1024
typedef struct PgInfo {
char name[MAX_ENTRY_NAME];
long start, size;
} PgInfo ;
char *progName;
char *pattern;
char *htdbFName;
int gverifydates=0;
regex_t reg_pattern;
static void
badDB()
{
fprintf(stderr, "%s: bad database file %s\n", progName, htdbFName);
exit(1);
}
static void
untexbuf(register char* s)
{
char *d = s;
while (*s)
switch (*s) {
case '\\':
*d++ = ' ';
s++;
if (*s != '%')
while (isalpha(*s))
s++;
break;
case '%':
*d++ = ' ';
s++;
while (*s && *s != '\n')
s++;
break;
case '{':
case '}':
case '#':
*d++ = ' ';
s++;
break;
default:
*d++ = *s++;
}
*d = 0;
}
static void
splitpage(char* buf, char** ptitle, char** pbody)
{
int n, depth, tno;
char* s;
switch (buf[1]) {
case 'p':
tno = 2;
break;
case 'b':
tno = 3;
break;
default:
fprintf(stderr, "%s: Invalid page format: %s\n", progName, buf);
exit(1);
}
n = 0;
depth = 0;
for (s = buf; *s; s++) {
if (*s == '{')
if (++depth == 1 && ++n == tno)
*ptitle = s + 1;
if (*s == '}')
if (depth-- == 1 && n == tno) {
*s = 0;
*pbody = s + 1;
break;
}
}
}
static void
squirt(char* s, int n)
{
char *t, *e;
int c;
c = s[n];
for (t = s, e = s + n; t < e; t++)
if (*t == '`' || *t == '\n')
*t = ' ';
if (s[n] != 0) {
s[n] = 0;
}
printf("{%.*s}", n, s);
s[n] = c;
}
static void
searchPage(char* pgname, char* pgtitle, char* pgbody)
{
char *bodyrest;
regmatch_t match_pos;
int nhits = 0;
if (!regexec(®_pattern, pgtitle, 1, &match_pos, 0))
nhits++;
bodyrest = pgbody;
while (!regexec(®_pattern, bodyrest, 1, &match_pos, 0)) {
nhits++;
bodyrest += match_pos.rm_eo;
}
if (nhits) {
printf("\\newsearchresultentry{%d}{%s}",nhits, pgtitle);
squirt(pgname, strlen(pgname));
printf("\n");
}
}
static void
handlePage(FILE* infile, PgInfo* pg)
{
static char *pgBuf = 0;
static int pgBufSize = 0;
char *title, *body;
if (pg->size > pgBufSize - 1) {
if (pgBuf)
free(pgBuf);
pgBufSize = pg->size + 20000;
pgBuf = (char *)malloc(pgBufSize);
if (!pgBuf) {
fprintf(stderr,"%s: Out of memory\n", progName);
exit(1);
}
}
fseek(infile, pg->start, 0);
fread(pgBuf, pg->size, 1, infile);
pgBuf[pg->size] = 0;
splitpage(pgBuf, &title, &body);
untexbuf(body);
#ifdef DEBUG
printf("-------------- %s -------------\n%s", pg->name, pgBuf);
printf("============== %s =============\n", title);
printf("%s", body);
#endif
searchPage(pg->name, title, body);
}
static void
handleFilePages(const char* fname, int pgc, PgInfo* pgv)
{
FILE *infile;
int i;
infile = fopen(fname, "r");
if (infile == NULL) {
fprintf(stderr, "%s: Cannot read file %s\n", progName, fname);
exit(1);
}
for (i = 0; i < pgc; i++)
handlePage(infile, pgv + i);
fclose(infile);
}
static void
handleFile(FILE* htdbFile)
{
static PgInfo *pgInfoV = 0;
static int pgInfoC = 0;
char htdbLine[MAX_HTDB_LINE];
char htfname[MAX_HTDB_LINE];
time_t httime;
long htsize;
struct stat htstat;
long fstart, fend;
int rc, i, npages;
char entname[MAX_ENTRY_NAME], enttype[MAX_ENTRY_TYPE];
long entoffset, entlineno;
fgets(htdbLine, MAX_HTDB_LINE, htdbFile);
sscanf(htdbLine, " %s %ld", htfname, &httime);
rc = stat(htfname, &htstat);
if (rc == -1) {
fprintf(stderr, "%s: Cannot access %s\n", progName, htfname);
exit(1);
}
if (gverifydates && (htstat.st_mtime != httime)) {
fprintf(stderr, "%s: Out of date file %s\n", progName, htfname);
exit(1);
}
htsize = htstat.st_size;
npages = 0;
fstart = ftell(htdbFile);
fend = ftell(htdbFile);
while (fgets(htdbLine, MAX_HTDB_LINE, htdbFile) != NULL) {
if (htdbLine[0] == '\t')
break;
if (!strncmp(htdbLine, "\\page", 5))
npages++;
fend = ftell(htdbFile);
}
if (npages > pgInfoC) {
if (pgInfoV)
free(pgInfoV);
pgInfoC = npages;
pgInfoV = (PgInfo *)
malloc(npages * sizeof(PgInfo));
if (!pgInfoV) {
fprintf(stderr, "%s: out of memory\n", progName);
exit(1);
}
}
fseek(htdbFile, fstart, 0);
for (i = 0; fgets(htdbLine, MAX_HTDB_LINE, htdbFile) != NULL;) {
if (htdbLine[0] == '\t')
break;
sscanf(htdbLine, "%s %s %ld %ld",
enttype, entname, &entoffset, &entlineno);
if (i > 0 && pgInfoV[i - 1].size == -1)
pgInfoV[i - 1].size = entoffset - pgInfoV[i - 1].start;
if (!strcmp(enttype, "\\page")) {
strncpy(pgInfoV[i].name, entname, MAX_ENTRY_NAME);
pgInfoV[i].start = entoffset;
pgInfoV[i].size = -1;
i++;
}
}
if (i > 0 && pgInfoV[i - 1].size == -1)
pgInfoV[i - 1].size = htsize - pgInfoV[i - 1].start;
if (i != npages)
badDB();
fseek(htdbFile, fend, 0);
handleFilePages(htfname, npages, pgInfoV);
}
static void
handleHtdb()
{
FILE *htdbFile;
int c;
htdbFile = fopen(htdbFName, "r");
if (htdbFile == NULL)
badDB();
while ((c = getc(htdbFile)) != EOF) {
if (c != '\t')
badDB();
ungetc(c, htdbFile);
handleFile(htdbFile);
}
fclose(htdbFile);
}
static void
cmdline(int argc, char** argv)
{
progName = argv[0];
if (argc != 3) {
fprintf(stderr, "Usage: %s pattern htdb-file\n", progName);
exit(1);
}
pattern = argv[1];
htdbFName = argv[2];
}
int
main(int argc, char** argv)
{
using namespace OpenAxiom;
oa_setenv("LC_ALL", "C");
setlocale(LC_ALL, "");
cmdline(argc, argv);
regcomp(®_pattern, pattern, REG_NEWLINE);
handleHtdb();
return(0);
}