/* * xmlSeed.c: Generate the XML seed corpus for fuzzing. * * See Copyright for the status of this software. */ #include #include #include #include #include #ifdef _WIN32 #include #else #include #endif #include #include #include #include #include #include "fuzz.h" #define PATH_SIZE 500 #define SEED_BUF_SIZE 16384 #define EXPR_SIZE 4500 #define FLAG_READER (1 << 0) #define FLAG_LINT (1 << 1) typedef int (*fileFunc)(const char *base, FILE *out); typedef int (*mainFunc)(const char *arg); static struct { FILE *out; xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ xmlExternalEntityLoader oldLoader; fileFunc processFile; const char *fuzzer; int counter; char cwd[PATH_SIZE]; int flags; } globalData; #if defined(HAVE_SCHEMA_FUZZER) || \ defined(HAVE_XML_FUZZER) /* * A custom entity loader that writes all external DTDs or entities to a * single file in the format expected by xmlFuzzEntityLoader. */ static xmlParserInputPtr fuzzEntityRecorder(const char *URL, const char *ID, xmlParserCtxtPtr ctxt) { xmlParserInputPtr in; static const int chunkSize = 16384; int len; in = xmlNoNetExternalEntityLoader(URL, ID, ctxt); if (in == NULL) return(NULL); if (globalData.entities == NULL) { globalData.entities = xmlHashCreate(4); } else if (xmlHashLookup(globalData.entities, (const xmlChar *) URL) != NULL) { return(in); } do { len = xmlParserInputBufferGrow(in->buf, chunkSize); if (len < 0) { fprintf(stderr, "Error reading %s\n", URL); xmlFreeInputStream(in); return(NULL); } } while (len > 0); xmlFuzzWriteString(globalData.out, URL); xmlFuzzWriteString(globalData.out, (char *) xmlBufContent(in->buf->buffer)); xmlFreeInputStream(in); xmlHashAddEntry(globalData.entities, (const xmlChar *) URL, globalData.entities); return(xmlNoNetExternalEntityLoader(URL, ID, ctxt)); } static void fuzzRecorderInit(FILE *out) { globalData.out = out; globalData.entities = xmlHashCreate(8); globalData.oldLoader = xmlGetExternalEntityLoader(); xmlSetExternalEntityLoader(fuzzEntityRecorder); } static void fuzzRecorderCleanup(void) { xmlSetExternalEntityLoader(globalData.oldLoader); xmlHashFree(globalData.entities, NULL); globalData.out = NULL; globalData.entities = NULL; globalData.oldLoader = NULL; } #endif #ifdef HAVE_XML_FUZZER static int processXml(const char *docFile, FILE *out) { int opts = XML_PARSE_NOENT | XML_PARSE_DTDLOAD; xmlDocPtr doc; if (globalData.flags & FLAG_LINT) { /* Switches */ xmlFuzzWriteInt(out, 0, 4); xmlFuzzWriteInt(out, 0, 4); /* maxmem */ xmlFuzzWriteInt(out, 0, 4); /* max-ampl */ xmlFuzzWriteInt(out, 0, 1); /* pretty */ xmlFuzzWriteInt(out, 0, 1); /* encode */ xmlFuzzWriteString(out, ""); /* pattern */ xmlFuzzWriteString(out, ""); /* xpath */ xmlFuzzWriteString(out, ""); } else { /* Parser options. */ xmlFuzzWriteInt(out, opts, 4); /* Max allocations. */ xmlFuzzWriteInt(out, 0, 4); if (globalData.flags & FLAG_READER) { /* Initial reader program with a couple of OP_READs */ xmlFuzzWriteString(out, "\x01\x01\x01\x01\x01\x01\x01\x01"); } } fuzzRecorderInit(out); doc = xmlReadFile(docFile, NULL, opts); #ifdef LIBXML_XINCLUDE_ENABLED xmlXIncludeProcessFlags(doc, opts); #endif xmlFreeDoc(doc); fuzzRecorderCleanup(); return(0); } #endif #ifdef HAVE_HTML_FUZZER static int processHtml(const char *docFile, FILE *out) { char buf[SEED_BUF_SIZE]; FILE *file; size_t size; /* Parser options. */ xmlFuzzWriteInt(out, 0, 4); /* Max allocations. */ xmlFuzzWriteInt(out, 0, 4); /* Copy file */ file = fopen(docFile, "rb"); if (file == NULL) { fprintf(stderr, "couldn't open %s\n", docFile); return(0); } do { size = fread(buf, 1, SEED_BUF_SIZE, file); if (size > 0) fwrite(buf, 1, size, out); } while (size == SEED_BUF_SIZE); fclose(file); return(0); } #endif #ifdef HAVE_SCHEMA_FUZZER static int processSchema(const char *docFile, FILE *out) { xmlSchemaPtr schema; xmlSchemaParserCtxtPtr pctxt; /* Max allocations. */ xmlFuzzWriteInt(out, 0, 4); fuzzRecorderInit(out); pctxt = xmlSchemaNewParserCtxt(docFile); xmlSchemaSetParserErrors(pctxt, xmlFuzzErrorFunc, xmlFuzzErrorFunc, NULL); schema = xmlSchemaParse(pctxt); xmlSchemaFreeParserCtxt(pctxt); xmlSchemaFree(schema); fuzzRecorderCleanup(); return(0); } #endif #if defined(HAVE_HTML_FUZZER) || \ defined(HAVE_SCHEMA_FUZZER) || \ defined(HAVE_XML_FUZZER) static int processPattern(const char *pattern) { glob_t globbuf; int ret = 0; int res; size_t i; res = glob(pattern, 0, NULL, &globbuf); if (res == GLOB_NOMATCH) return(0); if (res != 0) { fprintf(stderr, "couldn't match pattern %s\n", pattern); return(-1); } for (i = 0; i < globbuf.gl_pathc; i++) { struct stat statbuf; char outPath[PATH_SIZE]; char *dirBuf = NULL; char *baseBuf = NULL; const char *path, *dir, *base; FILE *out = NULL; int dirChanged = 0; size_t size; path = globbuf.gl_pathv[i]; if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) continue; dirBuf = (char *) xmlCharStrdup(path); baseBuf = (char *) xmlCharStrdup(path); if ((dirBuf == NULL) || (baseBuf == NULL)) { fprintf(stderr, "memory allocation failed\n"); ret = -1; goto error; } dir = dirname(dirBuf); base = basename(baseBuf); size = snprintf(outPath, sizeof(outPath), "seed/%s/%s", globalData.fuzzer, base); if (size >= PATH_SIZE) { fprintf(stderr, "creating path failed\n"); ret = -1; goto error; } out = fopen(outPath, "wb"); if (out == NULL) { fprintf(stderr, "couldn't open %s for writing\n", outPath); ret = -1; goto error; } if (chdir(dir) != 0) { fprintf(stderr, "couldn't chdir to %s\n", dir); ret = -1; goto error; } dirChanged = 1; if (globalData.processFile(base, out) != 0) ret = -1; error: if (out != NULL) fclose(out); xmlFree(dirBuf); xmlFree(baseBuf); if ((dirChanged) && (chdir(globalData.cwd) != 0)) { fprintf(stderr, "couldn't chdir to %s\n", globalData.cwd); ret = -1; break; } } globfree(&globbuf); return(ret); } #endif #ifdef HAVE_XPATH_FUZZER static int processXPath(const char *testDir, const char *prefix, const char *name, const char *data, const char *subdir, int xptr) { char pattern[PATH_SIZE]; glob_t globbuf; size_t i, size; int ret = 0, res; size = snprintf(pattern, sizeof(pattern), "%s/%s/%s*", testDir, subdir, prefix); if (size >= PATH_SIZE) return(-1); res = glob(pattern, 0, NULL, &globbuf); if (res == GLOB_NOMATCH) return(0); if (res != 0) { fprintf(stderr, "couldn't match pattern %s\n", pattern); return(-1); } for (i = 0; i < globbuf.gl_pathc; i++) { char *path = globbuf.gl_pathv[i]; struct stat statbuf; FILE *in; char expr[EXPR_SIZE]; if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) continue; in = fopen(path, "rb"); if (in == NULL) { ret = -1; continue; } while (fgets(expr, EXPR_SIZE, in) != NULL) { char outPath[PATH_SIZE]; FILE *out; int j; for (j = 0; expr[j] != 0; j++) if (expr[j] == '\r' || expr[j] == '\n') break; expr[j] = 0; size = snprintf(outPath, sizeof(outPath), "seed/xpath/%s-%d", name, globalData.counter); if (size >= PATH_SIZE) { ret = -1; continue; } out = fopen(outPath, "wb"); if (out == NULL) { ret = -1; continue; } /* Max allocations. */ xmlFuzzWriteInt(out, 0, 4); if (xptr) { xmlFuzzWriteString(out, expr); } else { char xptrExpr[EXPR_SIZE+100]; /* Wrap XPath expressions as XPointer */ snprintf(xptrExpr, sizeof(xptrExpr), "xpointer(%s)", expr); xmlFuzzWriteString(out, xptrExpr); } xmlFuzzWriteString(out, data); fclose(out); globalData.counter++; } fclose(in); } globfree(&globbuf); return(ret); } static int processXPathDir(const char *testDir) { char pattern[PATH_SIZE]; glob_t globbuf; size_t i, size; int ret = 0; globalData.counter = 1; if (processXPath(testDir, "", "expr", "", "expr", 0) != 0) ret = -1; size = snprintf(pattern, sizeof(pattern), "%s/docs/*", testDir); if (size >= PATH_SIZE) return(1); if (glob(pattern, 0, NULL, &globbuf) != 0) return(1); for (i = 0; i < globbuf.gl_pathc; i++) { char *path = globbuf.gl_pathv[i]; char *data; const char *docFile; data = xmlSlurpFile(path, NULL); if (data == NULL) { ret = -1; continue; } docFile = basename(path); globalData.counter = 1; if (processXPath(testDir, docFile, docFile, data, "tests", 0) != 0) ret = -1; if (processXPath(testDir, docFile, docFile, data, "xptr", 1) != 0) ret = -1; if (processXPath(testDir, docFile, docFile, data, "xptr-xp1", 1) != 0) ret = -1; xmlFree(data); } globfree(&globbuf); return(ret); } #endif int main(int argc, const char **argv) { mainFunc processArg = NULL; const char *fuzzer; int ret = 0; int i; if (argc < 3) { fprintf(stderr, "usage: seed [FUZZER] [PATTERN...]\n"); return(1); } xmlSetGenericErrorFunc(NULL, xmlFuzzErrorFunc); fuzzer = argv[1]; if (strcmp(fuzzer, "html") == 0) { #ifdef HAVE_HTML_FUZZER processArg = processPattern; globalData.processFile = processHtml; #endif } else if (strcmp(fuzzer, "lint") == 0) { #ifdef HAVE_LINT_FUZZER processArg = processPattern; globalData.flags |= FLAG_LINT; globalData.processFile = processXml; #endif } else if (strcmp(fuzzer, "reader") == 0) { #ifdef HAVE_READER_FUZZER processArg = processPattern; globalData.flags |= FLAG_READER; globalData.processFile = processXml; #endif } else if (strcmp(fuzzer, "schema") == 0) { #ifdef HAVE_SCHEMA_FUZZER processArg = processPattern; globalData.processFile = processSchema; #endif } else if (strcmp(fuzzer, "valid") == 0) { #ifdef HAVE_VALID_FUZZER processArg = processPattern; globalData.processFile = processXml; #endif } else if (strcmp(fuzzer, "xinclude") == 0) { #ifdef HAVE_XINCLUDE_FUZZER processArg = processPattern; globalData.processFile = processXml; #endif } else if (strcmp(fuzzer, "xml") == 0) { #ifdef HAVE_XML_FUZZER processArg = processPattern; globalData.processFile = processXml; #endif } else if (strcmp(fuzzer, "xpath") == 0) { #ifdef HAVE_XPATH_FUZZER processArg = processXPathDir; #endif } else { fprintf(stderr, "unknown fuzzer %s\n", fuzzer); return(1); } globalData.fuzzer = fuzzer; if (getcwd(globalData.cwd, PATH_SIZE) == NULL) { fprintf(stderr, "couldn't get current directory\n"); return(1); } if (processArg != NULL) for (i = 2; i < argc; i++) processArg(argv[i]); return(ret); }