# sr.yaml # ------- # Serbian language specification alphabet: абвгдђежзијклљмнњопрстћуфхцчџш alphanumeric_probability: 0.7 components: level: null_probability: 0.8 alphanumeric_probability: 0.2 staircase: null_probability: 0.99 alphanumeric_probability: 0.01 entrance: null_probability: 0.999 alphanumeric_probability: 0.001 unit: null_probability: 0.7 alphanumeric_probability: 0.3 combinations: - components: - house_number - staircase - level - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 - components: - house_number - level - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 - components: - house_number - level label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.1 # For unit types like 2/34 - components: - house_number - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 numbers: default: &broj canonical: број abbreviated: бр sample: true canonical_probability: 0.3 abbreviated_probability: 0.6 sample_probability: 0.1 numeric: direction: left numeric_affix: affix: "бр." direction: left numeric_probability: 0.4 numeric_affix_probability: 0.6 alternatives: - alternative: &broj_latin canonical: broj abbreviated: br sample: true canonical_probability: 0.3 abbreviated_probability: 0.6 sample_probability: 0.1 numeric: direction: left numeric_affix: affix: "br." direction: left numeric_probability: 0.4 numeric_affix_probability: 0.6 and: default: &i canonical: и sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.9 alternatives: - alternative: &i_latin canonical: i sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.1 cross_streets: i: *i i_latin: *i_latin at: &na canonical: на sample: true canonical_probability: 0.8 sample_probability: 0.2 na_latin: &na_latin canonical: na sample: true canonical_probability: 0.8 sample_probability: 0.2 corner: &ugao canonical: угао sample: true canonical_probability: 0.8 sample_probability: 0.2 ugao_latin: &ugao_latin canonical: ugao sample: true canonical_probability: 0.8 sample_probability: 0.2 na_uglu: &na_uglu canonical: на углу sample: true canonical_probability: 0.8 sample_probability: 0.2 na_uglu_latin: &na_uglu_latin canonical: na uglu sample: true canonical_probability: 0.8 sample_probability: 0.2 intersection: default: *i probability: 0.65 alternatives: - alternative: *i_latin probability: 0.05 - alternative: *na probability: 0.075 - alternative: *na_latin probability: 0.025 - alternative: *ugao probability: 0.1 - alternative: *ugao_latin probability: 0.05 - alternative: *na_uglu probability: 0.025 - alternative: *na_uglu_latin probability: 0.025 izmedu: &izmedu canonical: између sample: true canonical_probability: 0.8 sample_probability: 0.2 parentheses_probability: 0.5 izmedu_latin: &izmedu_latin canonical: između sample: true canonical_probability: 0.8 sample_probability: 0.2 parentheses_probability: 0.5 between: default: *izmedu probability: 0.9 alternatives: - alternative: *izmedu_latin probability: 0.1 levels: sprat: &sprat canonical: спрат sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 sprat_latin: &sprat_latin canonical: sprat sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 kat: &kat canonical: кат sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 kat_latin: &kat_latin canonical: kat sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 etaza: &etaza canonical: етажа abbreviated: ет sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 etaza_latin: &etaza_latin canonical: etaža abbreviated: et sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 add_number_phrase: true add_number_phrase_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.3 roman_numeral_probability: 0.7 add_number_phrase: true add_number_phrase_probability: 0.1 numeric_probability: 0.4 ordinal_probability: 0.6 prizemlje: &prizemlje canonical: приземље sample: true canonical_probability: 0.9 sample_probability: 0.1 prizemlje_latin: &prizemlje_latin canonical: prizemlje sample: true canonical_probability: 0.9 sample_probability: 0.1 parter: &parter canonical: партер sample: true canonical_probability: 0.9 sample_probability: 0.1 parter_latin: &parter_latin canonical: parter sample: true canonical_probability: 0.9 sample_probability: 0.1 podrum: &podrum canonical: подрум sample: true canonical_probability: 0.7 sample_probability: 0.3 # e.g. подрум 1 numeric: direction: left direction_probability: 0.8 # e.g. 1. подрум ordinal: direction: right digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 standalone_probability: 0.99 number_abs_value: true number_min_abs_value: 1 numeric_probability: 0.005 ordinal_probability: 0.005 podrum_latin: &podrum_latin canonical: podrum sample: true canonical_probability: 0.7 sample_probability: 0.3 # e.g. подрум 1 numeric: direction: left direction_probability: 0.8 # e.g. 1. подрум ordinal: direction: right digits: ascii_probability: 0.7 roman_numeral_probability: 0.3 standalone_probability: 0.99 number_abs_value: true number_min_abs_value: 1 numeric_probability: 0.005 ordinal_probability: 0.005 aliases: "<-1": default: *podrum probability: 0.8 alternatives: - alternative: *podrum_latin probability: 0.2 "-1": default: *podrum probability: 0.8 alternatives: - alternative: *podrum_latin probability: 0.2 "0": default: *prizemlje probability: 0.45 alternatives: - alternative: *prizemlje_latin probability: 0.05 - alternative: *parter probability: 0.35 - alternative: *parter_latin probability: 0.05 - alternative: *sprat probability: 0.04 - alternative: *sprat_latin probability: 0.01 - alternative: *kat probability: 0.04 - alternative: *kat_latin probability: 0.01 numbering_starts_at: 0 alphanumeric: default: *sprat probability: 0.65 alternatives: - alternative: *sprat_latin probability: 0.1 - alternative: *kat probability: 0.15 - alternative: *kat_latin probability: 0.05 - alternative: *etaza probability: 0.04 - alternative: *etaza_latin probability: 0.01 numeric_probability: 0.69 # With this probability, pick an integer roman_numeral_probability: 0.3 # Pick a Roman numeral for the actual value alpha_probability: 0.0098 # With this probability, pick a letter e.g. A numeric_plus_alpha_probability: 0.0001 # e.g. 2A alpha_plus_numeric_probability: 0.0001 # e.g. A2 directions: right: &desno canonical: десно sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right desno_latin: &desno_latin canonical: desno sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right left: &levo canonical: лево sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right levo_latin: &levo_latin canonical: levo sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right alternatives: - alternative: *desno probability: 0.45 - alternative: *desno_latin probability: 0.05 - alternative: *levo probability: 0.45 - alternative: *levo_latin probability: 0.05 cardinal_directions: east: &istok canonical: исток abbreviated: и canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: и direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 istok_latin: &istok_latin canonical: istok abbreviated: i canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: i direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 west: &zapad canonical: запад abbreviated: з canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: з direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 zapad_latin: &zapad_latin canonical: zapad abbreviated: z canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: z direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 north: &sever canonical: север abbreviated: с canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: с direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 sever_latin: &sever_latin canonical: sever abbreviated: s canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: s direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 south: &jug canonical: југ abbreviated: ј sample: true canonical_probability: 0.75 abbreviated_probability: 0.1 sample_probability: 0.15 numeric: direction: right numeric_affix: affix: ј direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 jug_latin: &jug_latin canonical: jug abbreviated: j sample: true canonical_probability: 0.75 abbreviated_probability: 0.1 sample_probability: 0.15 numeric: direction: right numeric_affix: affix: j direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 alternatives: - alternative: *sever probability: 0.23 - alternative: *sever_latin probability: 0.02 - alternative: *istok probability: 0.23 - alternative: *istok_latin probability: 0.02 - alternative: *jug probability: 0.23 - alternative: *jug_latin probability: 0.02 - alternative: *zapad probability: 0.23 - alternative: *zapad_latin probability: 0.02 entrances: ulaz: &ulaz canonical: улаз sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left ulaz_latin: &ulaz_latin canonical: ulaz sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left # Ulaz 1, Ulaz A, etc. alphanumeric: &entrance_alphanumeric default: *ulaz probability: 0.8 alternatives: - alternative: *ulaz_latin probability: 0.2 numeric_probability: 0.1 # e.g. Ulaz 1 alpha_probability: 0.85 # e.g. Ulaz A numeric_plus_alpha_probability: 0.025 # e.g. 1A alpha_plus_numeric_probability: 0.025 # e.g. A1 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 staircases: stepeniste: &stepeniste canonical: степениште sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left stepeniste_latin: &stepeniste_latin canonical: stepenište sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left alphanumeric: &staircase_alphanumeric default: *stepeniste probability: 0.8 alternatives: - alternative: *stepeniste_latin probability: 0.2 numeric_probability: 0.75 alpha_probability: 0.2 numeric_plus_alpha_probability: 0.025 alpha_plus_numeric_probability: 0.025 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 directional: direction: right direction_probability: 0.85 modifier: alternatives: - alternative: *desno probability: 0.19 - alternative: *desno_latin probability: 0.01 - alternative: *levo probability: 0.19 - alternative: *levo_latin probability: 0.01 - alternative: *sever probability: 0.14 - alternative: *sever_latin probability: 0.01 - alternative: *jug probability: 0.14 - alternative: *jug_latin probability: 0.01 - alternative: *istok probability: 0.14 - alternative: *istok_latin probability: 0.01 - alternative: *zapad probability: 0.14 - alternative: *zapad_latin probability: 0.01 po_boxes: postanski_fah: &postanski_fah canonical: поштански фах abbreviated: пф sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 # poštanski fah br. 1234 postanski_fah_latin: &postanski_fah_latin canonical: poštanski fah abbreviated: pf sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 # poštanski fah br. 1234 postanski_pretinac: &postanski_pretinac canonical: поштански претинац sample: true canonical_probability: 0.6 sample_probability: 0.5 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 postanski_pretinac_latin: &postanski_pretinac_latin canonical: poštanski pretinac sample: true canonical_probability: 0.6 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 postanski_pregradak: &postanski_pregradak canonical: поштански преградак sample: true canonical_probability: 0.6 sample_probability: 0.5 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 postanski_pregradak_latin: &postanski_pregradak_latin canonical: poštanski pregradak sample: true canonical_probability: 0.6 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 alphanumeric: default: *postanski_fah probability: 0.7 alternatives: - alternative: *postanski_fah_latin probability: 0.05 - alternative: *postanski_pretinac probability: 0.1 - alternative: *postanski_pretinac_latin probability: 0.05 - alternative: *postanski_pregradak probability: 0.075 - alternative: *postanski_pregradak_latin probability: 0.025 numeric_probability: 0.9 # pf 123 alpha_probability: 0.05 # pf A numeric_plus_alpha_probability: 0.04 # pf 123G alpha_plus_numeric_probability: 0.01 # pf A123 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 digits: - length: 1 probability: 0.05 - length: 2 probability: 0.1 - length: 3 probability: 0.2 - length: 4 probability: 0.5 - length: 5 probability: 0.1 - length: 6 probability: 0.05 units: stan: &stan canonical: стан sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 stan_latin: &stan_latin canonical: stan sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 apartman: &apartman canonical: апартман abbreviated: ап sample: true canonical_probability: 0.4 abbreviated_probability: 0.2 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 apartman_latin: &apartman_latin canonical: apartman abbreviated: ap sample: true canonical_probability: 0.4 abbreviated_probability: 0.2 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 soba: &soba canonical: соба sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 soba_latin: &soba_latin canonical: soba sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 kancelarija: &kancelarija canonical: канцеларија sample: true canonical_probability: 0.6 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 kancelarija_latin: &kancelarija_latin canonical: kancelarija sample: true canonical_probability: 0.6 sample_probability: 0.4 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.1 alphanumeric: &unit_alphanumeric default: *stan probability: 0.5 alternatives: - alternative: *stan_latin probability: 0.1 - alternative: *apartman probability: 0.2 - alternative: *apartman_latin probability: 0.05 - alternative: *soba probability: 0.1 - alternative: *soba_latin probability: 0.05 numeric_probability: 0.9 # e.g. stan. 1 numeric_plus_alpha_probability: 0.03 # e.g. 1A alpha_plus_numeric_probability: 0.03 # e.g. A1 alpha_probability: 0.04 # e.g. stan A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 # If there are 10 floors, create unit numbers like #301 or #1032 use_floor_probability: 0.01 zones: commercial: &commercial_unit_types default: *soba probability: 0.55 alternatives: - alternative: *soba_latin probability: 0.05 - alternative: *kancelarija probability: 0.35 - alternative: *kancelarija_latin probability: 0.05 numeric_probability: 0.95 # e.g. soba 1 numeric_plus_alpha_probability: 0.01 # e.g. soba 1A alpha_plus_numeric_probability: 0.01 # e.g. soba A1 alpha_probability: 0.03 # e.g. soba A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 university: default: *soba probability: 0.9 alternatives: - alternative: *soba_latin probability: 0.1 numeric_probability: 0.95 # e.g. soba 1 numeric_plus_alpha_probability: 0.01 # e.g. soba 1A alpha_plus_numeric_probability: 0.01 # e.g. soba A1 alpha_probability: 0.03 # e.g. soba A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1