# bs.yaml # ------- # Bosnian language specification components: level: null_probability: 0.9 alphanumeric_probability: 0.1 staircase: null_probability: 0.99 alphanumeric_probability: 0.01 entrance: null_probability: 0.999 alphanumeric_probability: 0.001 unit: null_probability: 0.7 alphanumeric_probability: 0.3 combinations: - components: - house_number - staircase - level - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 - components: - house_number - level - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 - components: - house_number - level label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.1 # For unit types like 2/34 - components: - house_number - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 numbers: no_number: default: canonical: bez broja abbreviated: bb sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 default: &broj canonical: broj abbreviated: br sample: true canonical_probability: 0.3 abbreviated_probability: 0.6 sample_probability: 0.1 numeric: direction: left numeric_affix: affix: "br." whitespace_probability: 0.6 direction: left numeric_probability: 0.4 numeric_affix_probability: 0.6 alphanumeric_phrase_probability: 0.05 no_number_probability: 0.05 and: default: &i canonical: i sample: true canonical_probability: 0.8 sample_probability: 0.2 cross_streets: i: *i at: &na canonical: na sample: true canonical_probability: 0.8 sample_probability: 0.2 corner: &ugao canonical: ugao sample: true canonical_probability: 0.8 sample_probability: 0.2 corner_of: &uglu canonical: uglu sample: true canonical_probability: 0.8 sample_probability: 0.2 na_uglu: &na_uglu canonical: na uglu sample: true canonical_probability: 0.8 sample_probability: 0.2 intersection: default: *i probability: 0.65 alternatives: - alternative: *na probability: 0.1 - alternative: *uglu probability: 0.1 - alternative: *na_uglu probability: 0.1 - alternative: *ugao probability: 0.05 izmedu: &izmedu canonical: između sample: true canonical_probability: 0.8 sample_probability: 0.2 parentheses_probability: 0.5 between: default: *izmedu levels: sprat: &sprat canonical: sprat sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 kat: &kat canonical: kat sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 prizemlje: &prizemlje canonical: prizemlje sample: true canonical_probability: 0.9 sample_probability: 0.1 parter: &parter canonical: parter sample: true canonical_probability: 0.9 sample_probability: 0.1 mezanino: &mezanin canonical: mezanin half_floors: true canonical_probability: 0.8 sample_probability: 0.2 sample: true # e.g. mezanin 2 numeric: direction: left # e.g. 2. mezanin ordinal: direction: right numeric_probability: 0.1 ordinal_probability: 0.2 standalone_probability: 0.6 podrum: &podrum canonical: podrum sample: true canonical_probability: 0.7 sample_probability: 0.3 # e.g. podrum 1 numeric: direction: left direction_probability: 0.8 # e.g. 1. podrum ordinal: direction: right digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 standalone_probability: 0.99 number_abs_value: true number_min_abs_value: 1 numeric_probability: 0.005 ordinal_probability: 0.005 aliases: "<-1": default: *podrum "-1": default: *podrum # Special token for half-floors half_floors: default: *mezanin "0": default: *prizemlje probability: 0.5 alternatives: - alternative: *parter probability: 0.4 - alternative: *kat probability: 0.05 - alternative: *sprat probability: 0.05 numbering_starts_at: 0 alphanumeric: default: *kat probability: 0.5 alternatives: - alternative: *sprat probability: 0.5 numeric_probability: 0.69 # With this probability, pick an integer roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value alpha_probability: 0.0098 # With this probability, pick a letter e.g. A numeric_plus_alpha_probability: 0.0001 # e.g. 2A alpha_plus_numeric_probability: 0.0001 # e.g. A2 categories: near: default: canonical: u blizini nearby: default: canonical: u blizini probability: 0.6 alternatives: - alternative: canonical: u blizini ovdje probability: 0.3 - alternative: canonical: ovde probability: 0.1 near_me: default: canonical: u blizini mene # Don't worry about agreement in: default: canonical: u # Probabilities of each phrase near_probability: 0.35 nearby_probability: 0.2 near_me_probability: 0.1 in_probability: 0.35 directions: right: &desno canonical: desno sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right left: &lijevo canonical: lijevo sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right alternatives: - alternative: *desno probability: 0.5 - alternative: *lijevo probability: 0.5 cardinal_directions: east: &istok canonical: istok abbreviated: i canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: i direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 west: &zapad canonical: zapad abbreviated: z canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: z direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 north: &sjever canonical: sjever abbreviated: s canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: s direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 south: &jug canonical: jug abbreviated: j sample: true canonical_probability: 0.75 abbreviated_probability: 0.1 sample_probability: 0.15 numeric: direction: right numeric_affix: affix: j direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 alternatives: - alternative: *sjever probability: 0.25 - alternative: *istok probability: 0.23 - alternative: *jug probability: 0.23 - alternative: *zapad probability: 0.23 entrances: ulaz: &ulaz canonical: ulaz sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left # Ulaz 1, Ulaz A, etc. alphanumeric: &entrance_alphanumeric default: *ulaz numeric_probability: 0.1 # e.g. Ulaz 1 alpha_probability: 0.85 # e.g. Ulaz A numeric_plus_alpha_probability: 0.025 # e.g. 1A alpha_plus_numeric_probability: 0.025 # e.g. A1 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 staircases: stubiste: &stubiste canonical: stubište sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left alphanumeric: &staircase_alphanumeric default: *stubiste numeric_probability: 0.75 alpha_probability: 0.2 numeric_plus_alpha_probability: 0.025 alpha_plus_numeric_probability: 0.025 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 directional: direction: right direction_probability: 0.85 modifier: alternatives: - alternative: *desno probability: 0.2 - alternative: *lijevo probability: 0.2 - alternative: *sjever probability: 0.15 - alternative: *jug probability: 0.15 - alternative: *istok probability: 0.15 - alternative: *zapad probability: 0.15 po_boxes: postanski_pretinac: &postanski_pretinac canonical: poštanski pretinac abbreviated: p.p sample: true canonical_probability: 0.2 abbreviated_probability: 0.4 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 alphanumeric: default: *postanski_pretinac numeric_probability: 0.9 # pp 123 alpha_probability: 0.05 # p.p A numeric_plus_alpha_probability: 0.04 # pp 123G alpha_plus_numeric_probability: 0.01 # pp A123 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 digits: - length: 1 probability: 0.05 - length: 2 probability: 0.1 - length: 3 probability: 0.2 - length: 4 probability: 0.5 - length: 5 probability: 0.1 - length: 6 probability: 0.05 units: stan: &stan canonical: stan sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 apartman: &apartman canonical: apartman abbreviated: ap sample: true canonical_probability: 0.4 abbreviated_probability: 0.2 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 soba: &soba canonical: soba sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 ured: &ured canonical: ured sample: true canonical_probability: 0.6 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 alphanumeric: &unit_alphanumeric default: *stan probability: 0.6 alternatives: - alternative: *apartman probability: 0.3 - alternative: *soba probability: 0.1 numeric_probability: 0.9 # e.g. stan. 1 numeric_plus_alpha_probability: 0.03 # e.g. 1A alpha_plus_numeric_probability: 0.03 # e.g. A1 alpha_probability: 0.04 # e.g. stan A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 # If there are 10 floors, create unit numbers like #301 or #1032 use_floor_probability: 0.05 zones: commercial: &commercial_unit_types default: *soba probability: 0.6 alternatives: - alternative: *ured probability: 0.4 numeric_probability: 0.95 # e.g. soba 1 numeric_plus_alpha_probability: 0.01 # e.g. soba 1A alpha_plus_numeric_probability: 0.01 # e.g. soba A1 alpha_probability: 0.03 # e.g. soba A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 university: default: *soba numeric_probability: 0.95 # e.g. soba 1 numeric_plus_alpha_probability: 0.01 # e.g. soba 1A alpha_plus_numeric_probability: 0.01 # e.g. soba A1 alpha_probability: 0.03 # e.g. soba A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1