#!/usr/bin/python3 # ISC License (ISC) # # Copyright (c) 2016, Austin Hellyer # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY # SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER # RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF # CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN # CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. # # What is ISO 3166-3? # # | ISO 3166-3 is part of the ISO 3166 standard published by the International # | Organization for Standardization (ISO), and defines codes for country names # | which have been deleted from ISO 3166-1 since its first publication in 1974. # | # | - [Wikipedia](http://en.wikipedia.org/wiki/ISO_3166-3) # # Originally by zeyla on GitHub. # USAGE: # Requires python3. Go to the following page, sort by the numeric range, copy # the contents of the table minus the header cells, then run 'make update': # https://en.wikipedia.org/wiki/ISO_3166-3#Current_codes from bs4 import BeautifulSoup import os import re import subprocess import sys import urllib.request # Get the contents of the URL. html = urllib.request.urlopen('https://en.wikipedia.org/wiki/ISO_3166-3').read() soup = BeautifulSoup(html, 'html.parser') rows = soup.find('table', class_='sortable') \ .find_all('tr', style='vertical-align:top;') text = "" # Cycle through each row and append to the text its entry. for row in rows: # 0: Former country name # 1: Former codes (alpha2, alpha3, num) # 2: Period of validity # 3: New country names and codes cells = row.find_all('td') # 0: ISO 3166-3 code headers = row.find_all('th') # 0: alpha2 # 1: alpha3 # 2: num codes_data = cells[1].get_text().split('[')[0].split(', ') # 0: from # 1: to valid_data = cells[2].get_text().split('[')[0].split('–') regex_description = re.compile('\s\(.+?\)') regex_description2 = re.compile('\s\[.+?\]') alpha2 = codes_data[0] alpha3 = codes_data[1] code = headers[0].get_text().split('\n')[0].split(' [')[0] description_reg = regex_description2.sub('', regex_description.sub('', cells[3].\ get_text())) description_parts = description_reg.split('\n', 1) if len(description_parts) > 1: description_end = description_parts[1].split('\n') description = description_parts[0] + ' ' + '; '.join(description_end) else: description = description_reg name = cells[0].get_text().split(' !')[0].split(' [')[0] num = codes_data[2][0:3] valid_from = valid_data[0][0:4] valid_to = valid_data[1][0:4] text += ' codes.push(FormerCountryCode {\n' text += ' code: "{}",\n'.format(code) text += ' codes_former: FormerCountryCodeCodes {\n' text += ' alpha2: "{}",\n'.format(alpha2) text += ' alpha3: "{}",\n'.format(alpha3) text += ' num: "{}",\n'.format(num) text += ' },\n' text += ' description: "{}",\n'.format(description) text += ' name: "{}",\n'.format(name) text += ' validity: [{}, {}],\n'.format(valid_from, valid_to) text += ' });\n' # Read the codes.rs file and split it to find the code after 'Begin' and before # 'End' codes_path = os.path.join(os.path.dirname(__file__), '../src/codes.rs') with open(codes_path, 'r') as f: codes_file = f.read() # Split by where to insert the text. codes = codes_file.rsplit('// Begin', 1) # And where to end putting the text. codes_end = codes_file.rsplit('// End\n', 1) with open(codes_path, 'w') as f: f.write(codes[0] + '// Begin\n' + text + ' // End\n' + codes_end[1]) print('Updated.')