# ru.yaml # ------- # Russian language specification alphabet: абвгдежзийклмнопрстуфхцчшщъыьэюя alphabet_probability: 0.7 components: level: null_probability: 0.95 alphanumeric_probability: 0.04 standalone_probability: 0.01 staircase: null_probability: 0.99 alphanumeric_probability: 0.01 entrance: null_probability: 0.999 alphanumeric_probability: 0.001 unit: null_probability: 0.6 alphanumeric_probability: 0.4 combinations: - components: - house_number - staircase - level - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 - components: - house_number - level - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 - components: - house_number - level label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.01 # For unit types like 2/34 - components: - house_number - unit label: house_number separators: - separator: "/" probability: 0.95 - separator: "-" probability: 0.05 probability: 0.005 numbers: default: &nomer canonical: номер abbreviated: № sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 numeric: direction: left probability: 0.95 alternatives: - alternative: &nomer_latin canonical: nomer abbreviated: "no" sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 numeric: direction: left probability: 0.05 house_numbers: dom: &dom canonical: дом abbreviated: д sample: true canonical_probability: 0.6 abbreviated_probability: 0.3 sample_probability: 0.1 numeric: direction: left dom_latin: &dom_latin canonical: dom abbreviated: d sample: true canonical_probability: 0.6 abbreviated_probability: 0.3 sample_probability: 0.1 numeric: direction: left alphanumeric: default: *dom probability: 0.95 alternatives: - alternative: *dom_latin probability: 0.05 # без номера (б/н) addresses no_number: default: canonical: без номера abbreviated: б/н sample: true canonical_probability: 0.1 abbreviated_probability: 0.7 sample_probability: 0.2 # Very common in Russian to write dom/d alphanumeric_phrase_probability: 0.6 no_number_probability: 0.1 # With this probability, use без номера if no house_number is specified and: default: &i canonical: и sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.9 alternatives: - alternative: &i_latin canonical: i sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.1 cross_streets: i: *i i_latin: *i_latin corner: &ugol canonical: угол sample: true canonical_probability: 0.8 sample_probability: 0.2 ugol_latin: &ugol_latin canonical: ugol sample: true canonical_probability: 0.8 sample_probability: 0.2 uglu: &uglu canonical: углу sample: true canonical_probability: 0.8 sample_probability: 0.2 uglu_latin: &uglu_latin canonical: uglu sample: true canonical_probability: 0.8 sample_probability: 0.2 na_uglu: &na_uglu canonical: на углу sample: true canonical_probability: 0.8 sample_probability: 0.2 na_uglu_latin: &na_uglu_latin canonical: na uglu sample: true canonical_probability: 0.8 sample_probability: 0.2 intersection: default: *i probability: 0.65 alternatives: - alternative: *i_latin probability: 0.05 - alternative: *ugol probability: 0.075 - alternative: *ugol_latin probability: 0.075 - alternative: *uglu probability: 0.05 - alternative: *uglu_latin probability: 0.05 - alternative: *na_uglu probability: 0.025 - alternative: *na_uglu_latin probability: 0.025 mezhdu: &mezhdu canonical: между sample: true canonical_probability: 0.8 sample_probability: 0.2 parentheses_probability: 0.5 mezhdu_latin: &mezhdu_latin canonical: mezhdu sample: true canonical_probability: 0.8 sample_probability: 0.2 parentheses_probability: 0.5 between: default: *mezhdu probability: 0.9 alternatives: - alternative: *mezhdu_latin probability: 0.1 levels: etazh: &etazh canonical: этаж abbreviated: эт sample: true canonical_probability: 0.4 abbreviated_probability: 0.3 sample_probability: 0.3 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.8 roman_numeral_probability: 0.1 spellout_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.5 roman_numeral_probability: 0.3 spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 etazh_latin: &etazh_latin canonical: etazh abbreviated: et sample: true canonical_probability: 0.4 abbreviated_probability: 0.3 sample_probability: 0.3 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.8 roman_numeral_probability: 0.1 spellout_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.5 roman_numeral_probability: 0.3 spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 uroven: &uroven canonical: уровень sample: true canonical_probability: 0.7 sample_probability: 0.3 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.8 roman_numeral_probability: 0.1 spellout_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.5 roman_numeral_probability: 0.3 spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 uroven_latin: &uroven_latin canonical: uroven' sample: true canonical_probability: 0.7 sample_probability: 0.3 numeric: direction: left direction_probability: 0.9 digits: ascii_probability: 0.8 roman_numeral_probability: 0.1 spellout_probability: 0.1 ordinal: direction: right digits: ascii_probability: 0.5 roman_numeral_probability: 0.3 spellout_probability: 0.2 numeric_probability: 0.4 ordinal_probability: 0.6 pervyy_etazh: &pervyy_etazh canonical: первый этаж sample: true canonical_probability: 0.9 sample_probability: 0.1 pervyy_etazh_latin: &pervyy_etazh_latin canonical: pervyy etazh sample: true canonical_probability: 0.9 sample_probability: 0.1 nizhniy_etazh: &nizhniy_etazh canonical: нижний этаж sample: true canonical_probability: 0.9 sample_probability: 0.1 nizhniy_etazh_latin: &nizhniy_etazh_latin canonical: nizhniy etazh sample: true canonical_probability: 0.9 sample_probability: 0.1 tsokolnyy_etazh: &tsokolnyy_etazh canonical: цокольный этаж abbreviated: цок эт sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 tsokolnyy_etazh_latin: &tsokolnyy_etazh_latin canonical: tsokol'nyy etazh abbreviated: tsok et sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 podval: &podval canonical: подвал sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 numeric_affix: affix: п direction: left ordinal: direction: right number_abs_value: true number_min_abs_value: 2 # Basement 2 == Sub-basement 1 number_subtract_abs_value: 1 standalone_probability: 0.985 numeric_probability: 0.005 numeric_affix_probability: 0.005 ordinal_probability: 0.005 podval_latin: &podval_latin canonical: podval sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left direction_probability: 0.9 numeric_affix: affix: p direction: left ordinal: direction: right number_abs_value: true number_min_abs_value: 2 # Basement 2 == Sub-basement 1 number_subtract_abs_value: 1 standalone_probability: 0.985 numeric_probability: 0.005 numeric_affix_probability: 0.005 ordinal_probability: 0.005 aliases: "<-1": default: *podval probability: 0.9 alternatives: - alternative: *podval_latin probability: 0.1 "-1": &ground_floor default: *tsokolnyy_etazh probability: 0.89 alternatives: - alternative: *tsokolnyy_etazh_latin probability: 0.01 - alternative: *etazh probability: 0.09 - alternative: *etazh_latin probability: 0.01 "0": default: *pervyy_etazh probability: 0.6 alternatives: - alternative: *pervyy_etazh_latin probability: 0.05 - alternative: *nizhniy_etazh probability: 0.2 - alternative: *nizhniy_etazh_latin probability: 0.05 - alternative: *tsokolnyy_etazh probability: 0.075 - alternative: *tsokolnyy_etazh_latin probability: 0.025 numbering_starts_at: 0 alphanumeric: default: *etazh probability: 0.8 alternatives: - alternative: *etazh_latin probability: 0.1 - alternative: *uroven probability: 0.09 - alternative: *uroven_latin probability: 0.01 numeric_probability: 0.79 # With this probability, pick an integer roman_numeral_probability: 0.2 alpha_probability: 0.0098 # With this probability, pick a letter e.g. A numeric_plus_alpha_probability: 0.0001 # e.g. 2A alpha_plus_numeric_probability: 0.0001 # e.g. A2 blocks: alphanumeric: default: &blok canonical: блок abbreviated: бл sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 numeric: direction: left probability: 0.95 alternatives: - alternative: &blok_latin canonical: blok abbreviated: bl sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 numeric: direction: left probability: 0.05 categories: near: default: canonical: вблизи sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.69 alternatives: - alternative: canonical: vblizi sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: близ sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 - alternative: canonical: bliz sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: около sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 - alternative: canonical: под sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 - alternative: canonical: pod sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: okolo sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: у sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 - alternative: canonical: u sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: возле sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 - alternative: canonical: vozle sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: рядом с sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 - alternative: canonical: ryadom s sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 nearby: default: canonical: поблизости sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.64 alternatives: - alternative: canonical: poblizosti sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: рядом здесь sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.19 - alternative: canonical: ryadom zdes' sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: здесь sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.09 - alternative: canonical: zdes' sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 - alternative: canonical: рядом sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.04 - alternative: canonical: ryadom sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 near_me: default: canonical: рядом с мной sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.99 alternatives: - alternative: canonical: ryadom s mnoy sample: true canonical_probability: 0.8 sample_probability: 0.2 probability: 0.01 in: default: canonical: в probability: 0.99 alternatives: - alternative: canonical: v probability: 0.01 # Probabilities of each phrase near_probability: 0.35 nearby_probability: 0.2 near_me_probability: 0.1 in_probability: 0.35 directions: pravo: &pravo canonical: право sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right pravo_latin: &pravo_latin canonical: pravo sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right levo: &levo canonical: лево sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right levo_latin: &levo_latin canonical: levo sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right alternatives: - alternative: *pravo probability: 0.49 - alternative: *pravo_latin probability: 0.01 - alternative: *levo probability: 0.49 - alternative: *levo_latin probability: 0.01 cardinal_directions: vostok: &vostok canonical: восток abbreviated: в canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: в direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 vostok_latin: &vostok_latin canonical: vostok abbreviated: v canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: v direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 zapad: &zapad canonical: запад abbreviated: з canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: з direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 zapad_latin: &zapad_latin canonical: zapad abbreviated: z canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: z direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 sever: &sever canonical: север abbreviated: с canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: с direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 sever_latin: &sever_latin canonical: sever abbreviated: s canonical_probability: 0.95 abbreviated_probability: 0.05 numeric: direction: right numeric_affix: affix: s direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 yug: &yug canonical: юг abbreviated: ю sample: true canonical_probability: 0.75 abbreviated_probability: 0.1 sample_probability: 0.15 numeric: direction: right numeric_affix: affix: ю direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 yug_latin: &yug_latin canonical: yug abbreviated: y sample: true canonical_probability: 0.75 abbreviated_probability: 0.1 sample_probability: 0.15 numeric: direction: right numeric_affix: affix: y direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 alternatives: - alternative: *sever probability: 0.24 - alternative: *sever_latin probability: 0.01 - alternative: *vostok probability: 0.24 - alternative: *vostok_latin probability: 0.01 - alternative: *yug probability: 0.24 - alternative: *yug_latin probability: 0.01 - alternative: *zapad probability: 0.24 - alternative: *zapad_latin probability: 0.01 entrances: vkhod: &vkhod canonical: вход sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left vkhod_latin: &vkhod_latin canonical: vkhod sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left # вход 1, вход A, etc. alphanumeric: default: *vkhod probability: 0.99 alternatives: - alternative: *vkhod_latin probability: 0.01 numeric_probability: 0.1 # e.g. Wejście 1 alpha_probability: 0.85 # e.g. Wejście A numeric_plus_alpha_probability: 0.025 # e.g. 1A alpha_plus_numeric_probability: 0.025 # e.g. A1 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 staircases: lestnitsa: &lestnitsa canonical: лестница sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left lestnitsa_latin: &lestnitsa_latin canonical: lestnitsa sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left alphanumeric: &staircase_alphanumeric default: *lestnitsa probability: 0.99 alternatives: - alternative: *lestnitsa_latin probability: 0.01 numeric_probability: 0.75 alpha_probability: 0.2 numeric_plus_alpha_probability: 0.025 alpha_plus_numeric_probability: 0.025 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 directional: direction: right direction_probability: 0.85 modifier: alternatives: - alternative: *pravo probability: 0.19 - alternative: *pravo_latin probability: 0.01 - alternative: *levo probability: 0.19 - alternative: *levo_latin probability: 0.01 - alternative: *sever probability: 0.14 - alternative: *sever_latin probability: 0.01 - alternative: *yug probability: 0.14 - alternative: *yug_latin probability: 0.01 - alternative: *vostok probability: 0.14 - alternative: *vostok_latin probability: 0.01 - alternative: *zapad probability: 0.14 - alternative: *zapad_latin probability: 0.01 po_boxes: abonementnyy_pochtovyy_yashchik: &abonementnyy_pochtovyy_yashchik canonical: абонементный почтовый ящик abbreviated: а/я sample: true canonical_probability: 0.2 abbreviated_probability: 0.7 sample_probability: 0.1 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 abonementnyy_pochtovyy_yashchik_latin: &abonementnyy_pochtovyy_yashchik_latin canonical: abonementnyy pochtovyy yashchik abbreviated: a/ya sample: true canonical_probability: 0.2 abbreviated_probability: 0.7 sample_probability: 0.1 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 pochtovyy_yashchik: &pochtovyy_yashchik canonical: абонементный почтовый ящик sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 pochtovyy_yashchik_latin: &pochtovyy_yashchik_latin canonical: pochtovyy yashchik sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 pochtovyy_abonentskiy_yashchik: &pochtovyy_abonentskiy_yashchik canonical: почтовый абонентский ящик sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 pochtovyy_abonentskiy_yashchik_latin: &pochtovyy_abonentskiy_yashchik_latin canonical: pochtovyy abonentskiy yashchik sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.2 alphanumeric: default: *abonementnyy_pochtovyy_yashchik probability: 0.79 alternatives: - alternative: *abonementnyy_pochtovyy_yashchik_latin probability: 0.01 - alternative: *pochtovyy_yashchik probability: 0.14 - alternative: *pochtovyy_yashchik_latin probability: 0.01 - alternative: *pochtovyy_abonentskiy_yashchik probability: 0.04 - alternative: *pochtovyy_abonentskiy_yashchik_latin probability: 0.01 numeric_probability: 0.9 # а/я 123 alpha_probability: 0.05 # а/я А numeric_plus_alpha_probability: 0.04 # а/я 123А alpha_plus_numeric_probability: 0.01 # а/я А123 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 digits: - length: 1 probability: 0.05 - length: 2 probability: 0.1 - length: 3 probability: 0.2 - length: 4 probability: 0.5 - length: 5 probability: 0.1 - length: 6 probability: 0.05 units: kvartira: &kvartira canonical: квартира abbreviated: кв sample: true canonical_probability: 0.3 abbreviated_probability: 0.6 sample_probability: 0.1 numeric: direction: left kvartira_latin: &kvartira_latin canonical: kvartira abbreviated: kv sample: true canonical_probability: 0.3 abbreviated_probability: 0.6 sample_probability: 0.1 numeric: direction: left kabinet: &kabinet canonical: кабинет abbreviated: каб sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left kabinet_latin: &kabinet_latin canonical: kabinet abbreviated: kab sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left litera: &litera canonical: литера abbreviated: лит sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left litera_latin: &litera_latin canonical: litera abbreviated: lit sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left ofis: &ofis canonical: офис abbreviated: оф sample: true canonical_probability: 0.4 abbreviated_probability: 0.5 sample_probability: 0.1 numeric: direction: left ofis_latin: &ofis_latin canonical: ofis abbreviated: of sample: true canonical_probability: 0.4 abbreviated_probability: 0.5 sample_probability: 0.1 numeric: direction: left pomeshhenie: &pomeshhenie canonical: помещение abbreviated: пом sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left pomeshhenie_latin: &pomeshhenie_latin canonical: pomeshhenie abbreviated: pom sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left alphanumeric: &unit_alphanumeric default: *kvartira probability: 0.89 alternatives: - alternative: *kvartira_latin probability: 0.01 - alternative: *pomeshhenie probability: 0.09 - alternative: *pomeshhenie_latin probability: 0.01 numeric_probability: 0.9 # e.g. кв 1 numeric_plus_alpha_probability: 0.03 # e.g. 1А alpha_plus_numeric_probability: 0.03 # e.g. AА1 alpha_probability: 0.04 # e.g. кв А alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 # If there are 10 floors, create unit numbers like #301 or #1032 use_floor_probability: 0.1 alpha: default: *kvartira probability: 0.79 alternatives: - alternative: *kvartira_latin probability: 0.01 - alternative: *pomeshhenie probability: 0.09 - alternative: *pomeshhenie_latin probability: 0.01 - alternative: *litera probability: 0.09 - alternative: *litera_latin probability: 0.01 zones: commercial: default: *kabinet probability: 0.59 alternatives: - alternative: *kabinet_latin probability: 0.01 - alternative: *ofis probability: 0.29 - alternative: *ofis_latin probability: 0.01 - alternative: *pomeshhenie probability: 0.09 - alternative: *pomeshhenie_latin probability: 0.01 numeric_probability: 0.95 # e.g. kabinet 1 numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A alpha_plus_numeric_probability: 0.01 # e.g. kab A1 alpha_probability: 0.03 # e.g. kab A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 university: default: *pomeshhenie probability: 0.99 alternatives: - alternative: *pomeshhenie_latin probability: 0.01 numeric_probability: 0.95 # e.g. kabinet 1 numeric_plus_alpha_probability: 0.01 # e.g. kabinet 1A alpha_plus_numeric_probability: 0.01 # e.g. kab A1 alpha_probability: 0.03 # e.g. kab A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1