import sqlite3 import unittest from pathlib import Path EXT_PATH = "./dist/debug/robotstxt0" GOOGLE_ROBOTSTXT = ( Path(__file__).parent / "examples" / "google.com.robots.txt" ).read_text("utf-8") def connect(ext): db = sqlite3.connect(":memory:") db.execute("create table base_functions as select name from pragma_function_list") db.execute("create table base_modules as select name from pragma_module_list") db.enable_load_extension(True) db.load_extension(ext) db.execute( "create temp table loaded_functions as select name from pragma_function_list where name not in (select name from base_functions) order by name" ) db.execute( "create temp table loaded_modules as select name from pragma_module_list where name not in (select name from base_modules) order by name" ) db.row_factory = sqlite3.Row return db db = connect(EXT_PATH) def explain_query_plan(sql): return db.execute("explain query plan " + sql).fetchone()["detail"] def execute_all(sql, args=None): if args is None: args = [] results = db.execute(sql, args).fetchall() return list(map(lambda x: dict(x), results)) FUNCTIONS = [ "robotstxt_debug", "robotstxt_matches", "robotstxt_version", ] MODULES = [ "robotstxt_rules", "robotstxt_user_agents", ] def spread_args(args): return ",".join(["?"] * len(args)) class TestRobotstxt(unittest.TestCase): def test_funcs(self): funcs = list( map( lambda a: a[0], db.execute("select name from loaded_functions").fetchall(), ) ) self.assertEqual(funcs, FUNCTIONS) def test_modules(self): modules = list( map( lambda a: a[0], db.execute("select name from loaded_modules").fetchall() ) ) self.assertEqual(modules, MODULES) def test_robotstxt_version(self): self.assertEqual(db.execute("select robotstxt_version()").fetchone()[0][0], "v") def test_robotstxt_debug(self): debug = db.execute("select robotstxt_debug()").fetchone()[0] self.assertEqual(len(debug.splitlines()), 2) def test_robotstxt_matches(self): robotstxt_matches = lambda *args: db.execute( "select robotstxt_matches(?, ?, ?)", args ).fetchone()[0] self.assertEqual( robotstxt_matches(GOOGLE_ROBOTSTXT, "Twitterbot", "/search"), 1 ) self.assertEqual( robotstxt_matches(GOOGLE_ROBOTSTXT, "Twitterbot", "/groups"), 0 ) def test_robotstxt_user_agents(self): robotstxt_user_agents = lambda *args: execute_all( "select * from robotstxt_user_agents(?)", args ) self.assertEqual( robotstxt_user_agents(GOOGLE_ROBOTSTXT), [ {"name": "*", "rules": None, "source": 1}, {"name": "AdsBot-Google", "rules": None, "source": 280}, {"name": "Twitterbot", "rules": None, "source": 288}, {"name": "facebookexternalhit", "rules": None, "source": 295}, ], ) def test_robotstxt_rules(self): robotstxt_rules = lambda *args: execute_all( "select * from robotstxt_rules(?)", args ) self.assertEqual( robotstxt_rules(GOOGLE_ROBOTSTXT), # fmt: off [ {'user_agent': '*', 'source': 2, 'rule_type': 'disallow', 'path': '/search'}, {'user_agent': '*', 'source': 3, 'rule_type': 'allow', 'path': '/search/about'}, {'user_agent': '*', 'source': 4, 'rule_type': 'allow', 'path': '/search/static'}, {'user_agent': '*', 'source': 5, 'rule_type': 'allow', 'path': '/search/howsearchworks'}, {'user_agent': '*', 'source': 6, 'rule_type': 'disallow', 'path': '/sdch'}, {'user_agent': '*', 'source': 7, 'rule_type': 'disallow', 'path': '/groups'}, {'user_agent': '*', 'source': 8, 'rule_type': 'disallow', 'path': '/index.html?'}, {'user_agent': '*', 'source': 9, 'rule_type': 'disallow', 'path': '/?'}, {'user_agent': '*', 'source': 10, 'rule_type': 'allow', 'path': '/?hl='}, {'user_agent': '*', 'source': 11, 'rule_type': 'disallow', 'path': '/?hl=*&'}, {'user_agent': '*', 'source': 12, 'rule_type': 'allow', 'path': '/?hl=*&gws_rd=ssl$'}, {'user_agent': '*', 'source': 13, 'rule_type': 'disallow', 'path': '/?hl=*&*&gws_rd=ssl'}, {'user_agent': '*', 'source': 14, 'rule_type': 'allow', 'path': '/?gws_rd=ssl$'}, {'user_agent': '*', 'source': 15, 'rule_type': 'allow', 'path': '/?pt1=true$'}, {'user_agent': '*', 'source': 16, 'rule_type': 'disallow', 'path': '/imgres'}, {'user_agent': '*', 'source': 17, 'rule_type': 'disallow', 'path': '/u/'}, {'user_agent': '*', 'source': 18, 'rule_type': 'disallow', 'path': '/preferences'}, {'user_agent': '*', 'source': 19, 'rule_type': 'disallow', 'path': '/setprefs'}, {'user_agent': '*', 'source': 20, 'rule_type': 'disallow', 'path': '/default'}, {'user_agent': '*', 'source': 21, 'rule_type': 'disallow', 'path': '/m?'}, {'user_agent': '*', 'source': 22, 'rule_type': 'disallow', 'path': '/m/'}, {'user_agent': '*', 'source': 23, 'rule_type': 'allow', 'path': '/m/finance'}, {'user_agent': '*', 'source': 24, 'rule_type': 'disallow', 'path': '/wml?'}, {'user_agent': '*', 'source': 25, 'rule_type': 'disallow', 'path': '/wml/?'}, {'user_agent': '*', 'source': 26, 'rule_type': 'disallow', 'path': '/wml/search?'}, {'user_agent': '*', 'source': 27, 'rule_type': 'disallow', 'path': '/xhtml?'}, {'user_agent': '*', 'source': 28, 'rule_type': 'disallow', 'path': '/xhtml/?'}, {'user_agent': '*', 'source': 29, 'rule_type': 'disallow', 'path': '/xhtml/search?'}, {'user_agent': '*', 'source': 30, 'rule_type': 'disallow', 'path': '/xml?'}, {'user_agent': '*', 'source': 31, 'rule_type': 'disallow', 'path': '/imode?'}, {'user_agent': '*', 'source': 32, 'rule_type': 'disallow', 'path': '/imode/?'}, {'user_agent': '*', 'source': 33, 'rule_type': 'disallow', 'path': '/imode/search?'}, {'user_agent': '*', 'source': 34, 'rule_type': 'disallow', 'path': '/jsky?'}, {'user_agent': '*', 'source': 35, 'rule_type': 'disallow', 'path': '/jsky/?'}, {'user_agent': '*', 'source': 36, 'rule_type': 'disallow', 'path': '/jsky/search?'}, {'user_agent': '*', 'source': 37, 'rule_type': 'disallow', 'path': '/pda?'}, {'user_agent': '*', 'source': 38, 'rule_type': 'disallow', 'path': '/pda/?'}, {'user_agent': '*', 'source': 39, 'rule_type': 'disallow', 'path': '/pda/search?'}, {'user_agent': '*', 'source': 40, 'rule_type': 'disallow', 'path': '/sprint_xhtml'}, {'user_agent': '*', 'source': 41, 'rule_type': 'disallow', 'path': '/sprint_wml'}, {'user_agent': '*', 'source': 42, 'rule_type': 'disallow', 'path': '/pqa'}, {'user_agent': '*', 'source': 43, 'rule_type': 'disallow', 'path': '/palm'}, {'user_agent': '*', 'source': 44, 'rule_type': 'disallow', 'path': '/gwt/'}, {'user_agent': '*', 'source': 45, 'rule_type': 'disallow', 'path': '/purchases'}, {'user_agent': '*', 'source': 46, 'rule_type': 'disallow', 'path': '/local?'}, {'user_agent': '*', 'source': 47, 'rule_type': 'disallow', 'path': '/local_url'}, {'user_agent': '*', 'source': 48, 'rule_type': 'disallow', 'path': '/shihui?'}, {'user_agent': '*', 'source': 49, 'rule_type': 'disallow', 'path': '/shihui/'}, {'user_agent': '*', 'source': 50, 'rule_type': 'disallow', 'path': '/products?'}, {'user_agent': '*', 'source': 51, 'rule_type': 'disallow', 'path': '/product_'}, {'user_agent': '*', 'source': 52, 'rule_type': 'disallow', 'path': '/products_'}, {'user_agent': '*', 'source': 53, 'rule_type': 'disallow', 'path': '/products;'}, {'user_agent': '*', 'source': 54, 'rule_type': 'disallow', 'path': '/print'}, {'user_agent': '*', 'source': 55, 'rule_type': 'disallow', 'path': '/books/'}, {'user_agent': '*', 'source': 56, 'rule_type': 'disallow', 'path': '/bkshp?*q=*'}, {'user_agent': '*', 'source': 57, 'rule_type': 'disallow', 'path': '/books?*q=*'}, {'user_agent': '*', 'source': 58, 'rule_type': 'disallow', 'path': '/books?*output=*'}, {'user_agent': '*', 'source': 59, 'rule_type': 'disallow', 'path': '/books?*pg=*'}, {'user_agent': '*', 'source': 60, 'rule_type': 'disallow', 'path': '/books?*jtp=*'}, {'user_agent': '*', 'source': 61, 'rule_type': 'disallow', 'path': '/books?*jscmd=*'}, {'user_agent': '*', 'source': 62, 'rule_type': 'disallow', 'path': '/books?*buy=*'}, {'user_agent': '*', 'source': 63, 'rule_type': 'disallow', 'path': '/books?*zoom=*'}, {'user_agent': '*', 'source': 64, 'rule_type': 'allow', 'path': '/books?*q=related:*'}, {'user_agent': '*', 'source': 65, 'rule_type': 'allow', 'path': '/books?*q=editions:*'}, {'user_agent': '*', 'source': 66, 'rule_type': 'allow', 'path': '/books?*q=subject:*'}, {'user_agent': '*', 'source': 67, 'rule_type': 'allow', 'path': '/books/about'}, {'user_agent': '*', 'source': 68, 'rule_type': 'allow', 'path': '/booksrightsholders'}, {'user_agent': '*', 'source': 69, 'rule_type': 'allow', 'path': '/books?*zoom=1*'}, {'user_agent': '*', 'source': 70, 'rule_type': 'allow', 'path': '/books?*zoom=5*'}, {'user_agent': '*', 'source': 71, 'rule_type': 'allow', 'path': '/books/content?*zoom=1*'}, {'user_agent': '*', 'source': 72, 'rule_type': 'allow', 'path': '/books/content?*zoom=5*'}, {'user_agent': '*', 'source': 73, 'rule_type': 'disallow', 'path': '/ebooks/'}, {'user_agent': '*', 'source': 74, 'rule_type': 'disallow', 'path': '/ebooks?*q=*'}, {'user_agent': '*', 'source': 75, 'rule_type': 'disallow', 'path': '/ebooks?*output=*'}, {'user_agent': '*', 'source': 76, 'rule_type': 'disallow', 'path': '/ebooks?*pg=*'}, {'user_agent': '*', 'source': 77, 'rule_type': 'disallow', 'path': '/ebooks?*jscmd=*'}, {'user_agent': '*', 'source': 78, 'rule_type': 'disallow', 'path': '/ebooks?*buy=*'}, {'user_agent': '*', 'source': 79, 'rule_type': 'disallow', 'path': '/ebooks?*zoom=*'}, {'user_agent': '*', 'source': 80, 'rule_type': 'allow', 'path': '/ebooks?*q=related:*'}, {'user_agent': '*', 'source': 81, 'rule_type': 'allow', 'path': '/ebooks?*q=editions:*'}, {'user_agent': '*', 'source': 82, 'rule_type': 'allow', 'path': '/ebooks?*q=subject:*'}, {'user_agent': '*', 'source': 83, 'rule_type': 'allow', 'path': '/ebooks?*zoom=1*'}, {'user_agent': '*', 'source': 84, 'rule_type': 'allow', 'path': '/ebooks?*zoom=5*'}, {'user_agent': '*', 'source': 85, 'rule_type': 'disallow', 'path': '/patents?'}, {'user_agent': '*', 'source': 86, 'rule_type': 'disallow', 'path': '/patents/download/'}, {'user_agent': '*', 'source': 87, 'rule_type': 'disallow', 'path': '/patents/pdf/'}, {'user_agent': '*', 'source': 88, 'rule_type': 'disallow', 'path': '/patents/related/'}, {'user_agent': '*', 'source': 89, 'rule_type': 'disallow', 'path': '/scholar'}, {'user_agent': '*', 'source': 90, 'rule_type': 'disallow', 'path': '/citations?'}, {'user_agent': '*', 'source': 91, 'rule_type': 'allow', 'path': '/citations?user='}, {'user_agent': '*', 'source': 92, 'rule_type': 'disallow', 'path': '/citations?*cstart='}, {'user_agent': '*', 'source': 93, 'rule_type': 'allow', 'path': '/citations?view_op=new_profile'}, {'user_agent': '*', 'source': 94, 'rule_type': 'allow', 'path': '/citations?view_op=top_venues'}, {'user_agent': '*', 'source': 95, 'rule_type': 'allow', 'path': '/scholar_share'}, {'user_agent': '*', 'source': 96, 'rule_type': 'disallow', 'path': '/s?'}, {'user_agent': '*', 'source': 97, 'rule_type': 'disallow', 'path': '/maps?'}, {'user_agent': '*', 'source': 98, 'rule_type': 'allow', 'path': '/maps?*output=classic*'}, {'user_agent': '*', 'source': 99, 'rule_type': 'allow', 'path': '/maps?*file='}, {'user_agent': '*', 'source': 100, 'rule_type': 'disallow', 'path': '/mapstt?'}, {'user_agent': '*', 'source': 101, 'rule_type': 'disallow', 'path': '/mapslt?'}, {'user_agent': '*', 'source': 102, 'rule_type': 'disallow', 'path': '/mapabcpoi?'}, {'user_agent': '*', 'source': 103, 'rule_type': 'disallow', 'path': '/maphp?'}, {'user_agent': '*', 'source': 104, 'rule_type': 'disallow', 'path': '/mapprint?'}, {'user_agent': '*', 'source': 105, 'rule_type': 'disallow', 'path': '/maps/'}, {'user_agent': '*', 'source': 106, 'rule_type': 'allow', 'path': '/maps/search/'}, {'user_agent': '*', 'source': 107, 'rule_type': 'allow', 'path': '/maps/dir/'}, {'user_agent': '*', 'source': 108, 'rule_type': 'allow', 'path': '/maps/d/'}, {'user_agent': '*', 'source': 109, 'rule_type': 'allow', 'path': '/maps/reserve'}, {'user_agent': '*', 'source': 110, 'rule_type': 'allow', 'path': '/maps/about'}, {'user_agent': '*', 'source': 111, 'rule_type': 'allow', 'path': '/maps/match'}, {'user_agent': '*', 'source': 112, 'rule_type': 'disallow', 'path': '/maps/api/js/'}, {'user_agent': '*', 'source': 113, 'rule_type': 'allow', 'path': '/maps/api/js'}, {'user_agent': '*', 'source': 114, 'rule_type': 'disallow', 'path': '/mld?'}, {'user_agent': '*', 'source': 115, 'rule_type': 'disallow', 'path': '/staticmap?'}, {'user_agent': '*', 'source': 116, 'rule_type': 'disallow', 'path': '/help/maps/streetview/partners/welcome/'}, {'user_agent': '*', 'source': 117, 'rule_type': 'disallow', 'path': '/help/maps/indoormaps/partners/'}, {'user_agent': '*', 'source': 118, 'rule_type': 'disallow', 'path': '/lochp?'}, {'user_agent': '*', 'source': 119, 'rule_type': 'disallow', 'path': '/center'}, {'user_agent': '*', 'source': 120, 'rule_type': 'disallow', 'path': '/ie?'}, {'user_agent': '*', 'source': 121, 'rule_type': 'disallow', 'path': '/blogsearch/'}, {'user_agent': '*', 'source': 122, 'rule_type': 'disallow', 'path': '/blogsearch_feeds'}, {'user_agent': '*', 'source': 123, 'rule_type': 'disallow', 'path': '/advanced_blog_search'}, {'user_agent': '*', 'source': 124, 'rule_type': 'disallow', 'path': '/uds/'}, {'user_agent': '*', 'source': 125, 'rule_type': 'disallow', 'path': '/chart?'}, {'user_agent': '*', 'source': 126, 'rule_type': 'disallow', 'path': '/transit?'}, {'user_agent': '*', 'source': 127, 'rule_type': 'allow', 'path': '/calendar$'}, {'user_agent': '*', 'source': 128, 'rule_type': 'allow', 'path': '/calendar/about/'}, {'user_agent': '*', 'source': 129, 'rule_type': 'disallow', 'path': '/calendar/'}, {'user_agent': '*', 'source': 130, 'rule_type': 'disallow', 'path': '/cl2/feeds/'}, {'user_agent': '*', 'source': 131, 'rule_type': 'disallow', 'path': '/cl2/ical/'}, {'user_agent': '*', 'source': 132, 'rule_type': 'disallow', 'path': '/coop/directory'}, {'user_agent': '*', 'source': 133, 'rule_type': 'disallow', 'path': '/coop/manage'}, {'user_agent': '*', 'source': 134, 'rule_type': 'disallow', 'path': '/trends?'}, {'user_agent': '*', 'source': 135, 'rule_type': 'disallow', 'path': '/trends/music?'}, {'user_agent': '*', 'source': 136, 'rule_type': 'disallow', 'path': '/trends/hottrends?'}, {'user_agent': '*', 'source': 137, 'rule_type': 'disallow', 'path': '/trends/viz?'}, {'user_agent': '*', 'source': 138, 'rule_type': 'disallow', 'path': '/trends/embed.js?'}, {'user_agent': '*', 'source': 139, 'rule_type': 'disallow', 'path': '/trends/fetchComponent?'}, {'user_agent': '*', 'source': 140, 'rule_type': 'disallow', 'path': '/trends/beta'}, {'user_agent': '*', 'source': 141, 'rule_type': 'disallow', 'path': '/trends/topics'}, {'user_agent': '*', 'source': 142, 'rule_type': 'disallow', 'path': '/musica'}, {'user_agent': '*', 'source': 143, 'rule_type': 'disallow', 'path': '/musicad'}, {'user_agent': '*', 'source': 144, 'rule_type': 'disallow', 'path': '/musicas'}, {'user_agent': '*', 'source': 145, 'rule_type': 'disallow', 'path': '/musicl'}, {'user_agent': '*', 'source': 146, 'rule_type': 'disallow', 'path': '/musics'}, {'user_agent': '*', 'source': 147, 'rule_type': 'disallow', 'path': '/musicsearch'}, {'user_agent': '*', 'source': 148, 'rule_type': 'disallow', 'path': '/musicsp'}, {'user_agent': '*', 'source': 149, 'rule_type': 'disallow', 'path': '/musiclp'}, {'user_agent': '*', 'source': 150, 'rule_type': 'disallow', 'path': '/urchin_test/'}, {'user_agent': '*', 'source': 151, 'rule_type': 'disallow', 'path': '/movies?'}, {'user_agent': '*', 'source': 152, 'rule_type': 'disallow', 'path': '/wapsearch?'}, {'user_agent': '*', 'source': 153, 'rule_type': 'allow', 'path': '/safebrowsing/diagnostic'}, {'user_agent': '*', 'source': 154, 'rule_type': 'allow', 'path': '/safebrowsing/report_badware/'}, {'user_agent': '*', 'source': 155, 'rule_type': 'allow', 'path': '/safebrowsing/report_error/'}, {'user_agent': '*', 'source': 156, 'rule_type': 'allow', 'path': '/safebrowsing/report_phish/'}, {'user_agent': '*', 'source': 157, 'rule_type': 'disallow', 'path': '/reviews/search?'}, {'user_agent': '*', 'source': 158, 'rule_type': 'disallow', 'path': '/orkut/albums'}, {'user_agent': '*', 'source': 159, 'rule_type': 'disallow', 'path': '/cbk'}, {'user_agent': '*', 'source': 160, 'rule_type': 'disallow', 'path': '/recharge/dashboard/car'}, {'user_agent': '*', 'source': 161, 'rule_type': 'disallow', 'path': '/recharge/dashboard/static/'}, {'user_agent': '*', 'source': 162, 'rule_type': 'disallow', 'path': '/profiles/me'}, {'user_agent': '*', 'source': 163, 'rule_type': 'allow', 'path': '/profiles'}, {'user_agent': '*', 'source': 164, 'rule_type': 'disallow', 'path': '/s2/profiles/me'}, {'user_agent': '*', 'source': 165, 'rule_type': 'allow', 'path': '/s2/profiles'}, {'user_agent': '*', 'source': 166, 'rule_type': 'allow', 'path': '/s2/oz'}, {'user_agent': '*', 'source': 167, 'rule_type': 'allow', 'path': '/s2/photos'}, {'user_agent': '*', 'source': 168, 'rule_type': 'allow', 'path': '/s2/search/social'}, {'user_agent': '*', 'source': 169, 'rule_type': 'allow', 'path': '/s2/static'}, {'user_agent': '*', 'source': 170, 'rule_type': 'disallow', 'path': '/s2'}, {'user_agent': '*', 'source': 171, 'rule_type': 'disallow', 'path': '/transconsole/portal/'}, {'user_agent': '*', 'source': 172, 'rule_type': 'disallow', 'path': '/gcc/'}, {'user_agent': '*', 'source': 173, 'rule_type': 'disallow', 'path': '/aclk'}, {'user_agent': '*', 'source': 174, 'rule_type': 'disallow', 'path': '/cse?'}, {'user_agent': '*', 'source': 175, 'rule_type': 'disallow', 'path': '/cse/home'}, {'user_agent': '*', 'source': 176, 'rule_type': 'disallow', 'path': '/cse/panel'}, {'user_agent': '*', 'source': 177, 'rule_type': 'disallow', 'path': '/cse/manage'}, {'user_agent': '*', 'source': 178, 'rule_type': 'disallow', 'path': '/tbproxy/'}, {'user_agent': '*', 'source': 179, 'rule_type': 'disallow', 'path': '/imesync/'}, {'user_agent': '*', 'source': 180, 'rule_type': 'disallow', 'path': '/shenghuo/search?'}, {'user_agent': '*', 'source': 181, 'rule_type': 'disallow', 'path': '/support/forum/search?'}, {'user_agent': '*', 'source': 182, 'rule_type': 'disallow', 'path': '/reviews/polls/'}, {'user_agent': '*', 'source': 183, 'rule_type': 'disallow', 'path': '/hosted/images/'}, {'user_agent': '*', 'source': 184, 'rule_type': 'disallow', 'path': '/ppob/?'}, {'user_agent': '*', 'source': 185, 'rule_type': 'disallow', 'path': '/ppob?'}, {'user_agent': '*', 'source': 186, 'rule_type': 'disallow', 'path': '/accounts/ClientLogin'}, {'user_agent': '*', 'source': 187, 'rule_type': 'disallow', 'path': '/accounts/ClientAuth'}, {'user_agent': '*', 'source': 188, 'rule_type': 'disallow', 'path': '/accounts/o8'}, {'user_agent': '*', 'source': 189, 'rule_type': 'allow', 'path': '/accounts/o8/id'}, {'user_agent': '*', 'source': 190, 'rule_type': 'disallow', 'path': '/topicsearch?q='}, {'user_agent': '*', 'source': 191, 'rule_type': 'disallow', 'path': '/xfx7/'}, {'user_agent': '*', 'source': 192, 'rule_type': 'disallow', 'path': '/squared/api'}, {'user_agent': '*', 'source': 193, 'rule_type': 'disallow', 'path': '/squared/search'}, {'user_agent': '*', 'source': 194, 'rule_type': 'disallow', 'path': '/squared/table'}, {'user_agent': '*', 'source': 195, 'rule_type': 'disallow', 'path': '/qnasearch?'}, {'user_agent': '*', 'source': 196, 'rule_type': 'disallow', 'path': '/app/updates'}, {'user_agent': '*', 'source': 197, 'rule_type': 'disallow', 'path': '/sidewiki/entry/'}, {'user_agent': '*', 'source': 198, 'rule_type': 'disallow', 'path': '/quality_form?'}, {'user_agent': '*', 'source': 199, 'rule_type': 'disallow', 'path': '/labs/popgadget/search'}, {'user_agent': '*', 'source': 200, 'rule_type': 'disallow', 'path': '/buzz/post'}, {'user_agent': '*', 'source': 201, 'rule_type': 'disallow', 'path': '/compressiontest/'}, {'user_agent': '*', 'source': 202, 'rule_type': 'disallow', 'path': '/analytics/feeds/'}, {'user_agent': '*', 'source': 203, 'rule_type': 'disallow', 'path': '/analytics/partners/comments/'}, {'user_agent': '*', 'source': 204, 'rule_type': 'disallow', 'path': '/analytics/portal/'}, {'user_agent': '*', 'source': 205, 'rule_type': 'disallow', 'path': '/analytics/uploads/'}, {'user_agent': '*', 'source': 206, 'rule_type': 'allow', 'path': '/alerts/manage'}, {'user_agent': '*', 'source': 207, 'rule_type': 'allow', 'path': '/alerts/remove'}, {'user_agent': '*', 'source': 208, 'rule_type': 'disallow', 'path': '/alerts/'}, {'user_agent': '*', 'source': 209, 'rule_type': 'allow', 'path': '/alerts/$'}, {'user_agent': '*', 'source': 210, 'rule_type': 'disallow', 'path': '/ads/search?'}, {'user_agent': '*', 'source': 211, 'rule_type': 'disallow', 'path': '/ads/plan/action_plan?'}, {'user_agent': '*', 'source': 212, 'rule_type': 'disallow', 'path': '/ads/plan/api/'}, {'user_agent': '*', 'source': 213, 'rule_type': 'disallow', 'path': '/ads/hotels/partners'}, {'user_agent': '*', 'source': 214, 'rule_type': 'disallow', 'path': '/phone/compare/?'}, {'user_agent': '*', 'source': 215, 'rule_type': 'disallow', 'path': '/travel/clk'}, {'user_agent': '*', 'source': 216, 'rule_type': 'disallow', 'path': '/travel/flights/s/'}, {'user_agent': '*', 'source': 217, 'rule_type': 'disallow', 'path': '/hotelfinder/rpc'}, {'user_agent': '*', 'source': 218, 'rule_type': 'disallow', 'path': '/hotels/rpc'}, {'user_agent': '*', 'source': 219, 'rule_type': 'disallow', 'path': '/commercesearch/services/'}, {'user_agent': '*', 'source': 220, 'rule_type': 'disallow', 'path': '/evaluation/'}, {'user_agent': '*', 'source': 221, 'rule_type': 'disallow', 'path': '/chrome/browser/mobile/tour'}, {'user_agent': '*', 'source': 222, 'rule_type': 'disallow', 'path': '/compare/*/apply*'}, {'user_agent': '*', 'source': 223, 'rule_type': 'disallow', 'path': '/forms/perks/'}, {'user_agent': '*', 'source': 224, 'rule_type': 'disallow', 'path': '/shopping/suppliers/search'}, {'user_agent': '*', 'source': 225, 'rule_type': 'disallow', 'path': '/ct/'}, {'user_agent': '*', 'source': 226, 'rule_type': 'disallow', 'path': '/edu/cs4hs/'}, {'user_agent': '*', 'source': 227, 'rule_type': 'disallow', 'path': '/trustedstores/s/'}, {'user_agent': '*', 'source': 228, 'rule_type': 'disallow', 'path': '/trustedstores/tm2'}, {'user_agent': '*', 'source': 229, 'rule_type': 'disallow', 'path': '/trustedstores/verify'}, {'user_agent': '*', 'source': 230, 'rule_type': 'disallow', 'path': '/adwords/proposal'}, {'user_agent': '*', 'source': 231, 'rule_type': 'disallow', 'path': '/shopping?*'}, {'user_agent': '*', 'source': 232, 'rule_type': 'disallow', 'path': '/shopping/product/'}, {'user_agent': '*', 'source': 233, 'rule_type': 'disallow', 'path': '/shopping/seller'}, {'user_agent': '*', 'source': 234, 'rule_type': 'disallow', 'path': '/shopping/ratings/account/metrics'}, {'user_agent': '*', 'source': 235, 'rule_type': 'disallow', 'path': '/shopping/ratings/merchant/immersivedetails'}, {'user_agent': '*', 'source': 236, 'rule_type': 'disallow', 'path': '/shopping/reviewer'}, {'user_agent': '*', 'source': 237, 'rule_type': 'disallow', 'path': '/about/careers/applications/'}, {'user_agent': '*', 'source': 238, 'rule_type': 'disallow', 'path': '/about/careers/applications-a/'}, {'user_agent': '*', 'source': 239, 'rule_type': 'disallow', 'path': '/landing/signout.html'}, {'user_agent': '*', 'source': 240, 'rule_type': 'disallow', 'path': '/webmasters/sitemaps/ping?'}, {'user_agent': '*', 'source': 241, 'rule_type': 'disallow', 'path': '/ping?'}, {'user_agent': '*', 'source': 242, 'rule_type': 'disallow', 'path': '/gallery/'}, {'user_agent': '*', 'source': 243, 'rule_type': 'disallow', 'path': '/landing/now/ontap/'}, {'user_agent': '*', 'source': 244, 'rule_type': 'allow', 'path': '/searchhistory/'}, {'user_agent': '*', 'source': 245, 'rule_type': 'allow', 'path': '/maps/reserve'}, {'user_agent': '*', 'source': 246, 'rule_type': 'allow', 'path': '/maps/reserve/partners'}, {'user_agent': '*', 'source': 247, 'rule_type': 'disallow', 'path': '/maps/reserve/api/'}, {'user_agent': '*', 'source': 248, 'rule_type': 'disallow', 'path': '/maps/reserve/search'}, {'user_agent': '*', 'source': 249, 'rule_type': 'disallow', 'path': '/maps/reserve/bookings'}, {'user_agent': '*', 'source': 250, 'rule_type': 'disallow', 'path': '/maps/reserve/settings'}, {'user_agent': '*', 'source': 251, 'rule_type': 'disallow', 'path': '/maps/reserve/manage'}, {'user_agent': '*', 'source': 252, 'rule_type': 'disallow', 'path': '/maps/reserve/payment'}, {'user_agent': '*', 'source': 253, 'rule_type': 'disallow', 'path': '/maps/reserve/receipt'}, {'user_agent': '*', 'source': 254, 'rule_type': 'disallow', 'path': '/maps/reserve/sellersignup'}, {'user_agent': '*', 'source': 255, 'rule_type': 'disallow', 'path': '/maps/reserve/payments'}, {'user_agent': '*', 'source': 256, 'rule_type': 'disallow', 'path': '/maps/reserve/feedback'}, {'user_agent': '*', 'source': 257, 'rule_type': 'disallow', 'path': '/maps/reserve/terms'}, {'user_agent': '*', 'source': 258, 'rule_type': 'disallow', 'path': '/maps/reserve/m/'}, {'user_agent': '*', 'source': 259, 'rule_type': 'disallow', 'path': '/maps/reserve/b/'}, {'user_agent': '*', 'source': 260, 'rule_type': 'disallow', 'path': '/maps/reserve/partner-dashboard'}, {'user_agent': '*', 'source': 261, 'rule_type': 'disallow', 'path': '/about/views/'}, {'user_agent': '*', 'source': 262, 'rule_type': 'disallow', 'path': '/intl/*/about/views/'}, {'user_agent': '*', 'source': 263, 'rule_type': 'disallow', 'path': '/local/cars'}, {'user_agent': '*', 'source': 264, 'rule_type': 'disallow', 'path': '/local/cars/'}, {'user_agent': '*', 'source': 265, 'rule_type': 'disallow', 'path': '/local/dealership/'}, {'user_agent': '*', 'source': 266, 'rule_type': 'disallow', 'path': '/local/dining/'}, {'user_agent': '*', 'source': 267, 'rule_type': 'disallow', 'path': '/local/place/products/'}, {'user_agent': '*', 'source': 268, 'rule_type': 'disallow', 'path': '/local/place/reviews/'}, {'user_agent': '*', 'source': 269, 'rule_type': 'disallow', 'path': '/local/place/rap/'}, {'user_agent': '*', 'source': 270, 'rule_type': 'disallow', 'path': '/local/tab/'}, {'user_agent': '*', 'source': 271, 'rule_type': 'disallow', 'path': '/localservices/*'}, {'user_agent': '*', 'source': 272, 'rule_type': 'allow', 'path': '/finance'}, {'user_agent': '*', 'source': 273, 'rule_type': 'allow', 'path': '/js/'}, {'user_agent': '*', 'source': 274, 'rule_type': 'disallow', 'path': '/nonprofits/account/'}, {'user_agent': '*', 'source': 275, 'rule_type': 'disallow', 'path': '/fbx'}, {'user_agent': '*', 'source': 276, 'rule_type': 'disallow', 'path': '/uviewer'}, {'user_agent': '*', 'source': 277, 'rule_type': 'disallow', 'path': '/landing/cmsnext-root/'}, {'user_agent': 'AdsBot-Google', 'source': 281, 'rule_type': 'disallow', 'path': '/maps/api/js/'}, {'user_agent': 'AdsBot-Google', 'source': 282, 'rule_type': 'allow', 'path': '/maps/api/js'}, {'user_agent': 'AdsBot-Google', 'source': 283, 'rule_type': 'disallow', 'path': '/maps/api/place/js/'}, {'user_agent': 'AdsBot-Google', 'source': 284, 'rule_type': 'disallow', 'path': '/maps/api/staticmap'}, {'user_agent': 'AdsBot-Google', 'source': 285, 'rule_type': 'disallow', 'path': '/maps/api/streetview'}, {'user_agent': 'Twitterbot', 'source': 289, 'rule_type': 'allow', 'path': '/imgres'}, {'user_agent': 'Twitterbot', 'source': 290, 'rule_type': 'allow', 'path': '/search'}, {'user_agent': 'Twitterbot', 'source': 291, 'rule_type': 'disallow', 'path': '/groups'}, {'user_agent': 'Twitterbot', 'source': 292, 'rule_type': 'disallow', 'path': '/hosted/images/'}, {'user_agent': 'Twitterbot', 'source': 293, 'rule_type': 'disallow', 'path': '/m/'}, {'user_agent': 'facebookexternalhit', 'source': 296, 'rule_type': 'allow', 'path': '/imgres'}, {'user_agent': 'facebookexternalhit', 'source': 297, 'rule_type': 'allow', 'path': '/search'}, {'user_agent': 'facebookexternalhit', 'source': 298, 'rule_type': 'disallow', 'path': '/groups'}, {'user_agent': 'facebookexternalhit', 'source': 299, 'rule_type': 'disallow', 'path': '/hosted/images/'}, {'user_agent': 'facebookexternalhit', 'source': 300, 'rule_type': 'disallow', 'path': '/m/'} ] ) self.assertEqual( robotstxt_rules(''' User-agent: * User-agent: grapeshot Disallow: Allow: /editorial/wp-admin/admin-ajax.php '''), [{'path': '', 'rule_type': 'disallow', 'source': 5, 'user_agent': 'grapeshot'}, {'path': '/editorial/wp-admin/admin-ajax.php', 'rule_type': 'allow', 'source': 7, 'user_agent': 'grapeshot'}] ) class TestCoverage(unittest.TestCase): def test_coverage(self): test_methods = [ method for method in dir(TestRobotstxt) if method.startswith("test_") ] funcs_with_tests = set([x.replace("test_", "") for x in test_methods]) for func in FUNCTIONS: self.assertTrue( func in funcs_with_tests, f"{func} does not have corresponding test in {funcs_with_tests}", ) for module in MODULES: self.assertTrue( module in funcs_with_tests, f"{module} does not have corresponding test in {funcs_with_tests}", ) if __name__ == "__main__": unittest.main()