#!/usr/bin/env python3 """This file generates the lookup table from mime-db/db.json. You only need to run this when updating mime-db. """ import collections import io import json def assert_boring_ascii(text): """Check that it's safe to do code generation with this string.""" # If there's unicode we'll get incorrect offsets # If mime-db ever starts containing unicode (unlikely!), process strings # with .encode("utf8") first assert text.isascii() assert text.lower() == text assert text.isprintable() assert '"' not in text assert "'" not in text assert "\\" not in text assert "/" not in text with open("mime-db/db.json") as f: db = json.load(f) by_type = collections.defaultdict(dict) for mime, info in sorted(db.items()): if extensions := info.get("extensions"): type_, subtype = mime.split("/") by_type[type_][subtype] = extensions[0] raw_data = io.StringIO() lookup_text = io.StringIO() lookup_text.write( """// This file is generated by build.py // Do not edit manually &[ """ ) for type_, extensions in by_type.items(): assert_boring_ascii(type_) lookup_text.write(f"""("{type_}", &[\n""") for subtype, extension in extensions.items(): assert_boring_ascii(subtype) assert_boring_ascii(extension) assert "." not in extension lookup_text.write( f"""// {type_}/{subtype}: {extension} Entry({raw_data.tell()}, {len(subtype)}, {len(extension)}), """ ) raw_data.write(subtype) raw_data.write(extension) lookup_text.write("]),\n") lookup_text.write("]\n") with open("src/raw_data", "w") as f: f.write(raw_data.getvalue()) with open("src/lookup", "w") as f: f.write(lookup_text.getvalue())