#!/usr/bin/env python3 # -*- coding: utf-8 -*- import re import argparse import sys import platform import itertools import multiprocessing from cmark import CMark def hash_collisions(): REFMAP_SIZE = 16 COUNT = 50000 def badhash(ref): h = 0 for c in ref: a = (h << 6) & 0xFFFFFFFF b = (h << 16) & 0xFFFFFFFF h = ord(c) + a + b - h h = h & 0xFFFFFFFF return (h % REFMAP_SIZE) == 0 keys = ("x%d" % i for i in itertools.count()) collisions = itertools.islice((k for k in keys if badhash(k)), COUNT) bad_key = next(collisions) document = ''.join("[%s]: /url\n\n[%s]\n\n" % (key, bad_key) for key in collisions) return document, re.compile("(

\[%s\]

\n){%d}" % (bad_key, COUNT-1)) allowed_failures = {"many references": True} # list of pairs consisting of input and a regex that must match the output. pathological = { # note - some pythons have limit of 65535 for {num-matches} in re. "nested strong emph": (("*a **a " * 65000) + "b" + (" a** a*" * 65000), re.compile("(a a ){65000}b( a a){65000}")), "many emph closers with no openers": (("a_ " * 65000), re.compile("(a[_] ){64999}a_")), "many emph openers with no closers": (("_a " * 65000), re.compile("(_a ){64999}_a")), "many link closers with no openers": (("a]" * 65000), re.compile("(a\]){65000}")), "many link openers with no closers": (("[a" * 65000), re.compile("(\[a){65000}")), "mismatched openers and closers": (("*a_ " * 50000), re.compile("([*]a[_] ){49999}[*]a_")), "openers and closers multiple of 3": (("a**b" + ("c* " * 50000)), re.compile("a[*][*]b(c[*] ){49999}c[*]")), "link openers and emph closers": (("[ a_" * 50000), re.compile("(\[ a_){50000}")), "pattern [ (]( repeated": (("[ (](" * 80000), re.compile("(\[ \(\]\(){80000}")), "hard link/emph case": ("**x [a*b**c*](d)", re.compile("\\*\\*x ab\\*\\*c")), "nested brackets": (("[" * 50000) + "a" + ("]" * 50000), re.compile("\[{50000}a\]{50000}")), "nested block quotes": ((("> " * 50000) + "a"), re.compile("(
\n){50000}")), "deeply nested lists": ("".join(map(lambda x: (" " * x + "* a\n"), range(0,1000))), re.compile("\n){999}")), "U+0000 in input": ("abc\u0000de\u0000", re.compile("abc\ufffd?de\ufffd?")), "backticks": ("".join(map(lambda x: ("e" + "`" * x), range(1,5000))), re.compile("^

[e`]*

\n$")), "unclosed links A": ("[a](