# ca.yaml # ------- # Catalan language specification components: level: # If no floor number is specified null_probability: 0.6 alphanumeric_probability: 0.35 standalone_probability: 0.05 staircase: null_probability: 0.99 alphanumeric_probability: 0.01 entrance: null_probability: 0.999 alphanumeric_probability: 0.001 unit: # If no unit number is specified null_probability: 0.3 alphanumeric_probability: 0.65 standalone_probability: 0.05 numbers: default: &numero canonical: número abbreviated: "nº" sample: true canonical_probability: 0.1 abbreviated_probability: 0.7 sample_probability: 0.2 sample_exclude: - "#" numeric: direction: left numeric_affix: affix: "#" # e.g. #3, #2F, etc. probability: 0.5 alternatives: - alternative: direction: left # affix goes on the number's left # Probabilities for numbers numeric_probability: 0.7 numeric_affix_probability: 0.3 and: default: &i canonical: i abbreviated: "&" sample: true canonical_probability: 0.5 abbreviated_probability: 0.4 sample_probability: 0.1 house_numbers: # sense número (s/n) addresses no_number: default: canonical: sense número abbreviated: s/n sample: true canonical_probability: 0.1 abbreviated_probability: 0.7 sample_probability: 0.2 alphanumeric: default: *numero alphanumeric_phrase_probability: 0.01 no_number_probability: 0.1 # With this probability, use sense número if no house_number is specified levels: # Everywhere except Spain floor: &pis canonical: pis abbreviated: p sample: true canonical_probability: 0.8 abbreviated_probability: 0.1 sample_probability: 0.1 numeric: direction: left add_number_phrase: true # Occasionally add variation of "number", e.g. Pis No 2 add_number_phrase_probability: 0.05 numeric_affix: affix: p direction: left # P2 # e.g. 2o piso ordinal: direction: right direction_probability: 0.95 # Let it vary occasionally e.g. Pis 2o standalone_probability: 0.2 # Let e.g. 5º be the entire floor string # If ordinal is selected, chance of e.g. just using 2o without Piso null_phrase_probability: 0.6 numeric_probability: 0.2 numeric_affix_probability: 0.05 ordinal_probability: 0.75 # Ground floor baixos: &baixos canonical: baixos abbreviated: bxs sample: true canonical_probability: 0.6 abbreviated_probability: 0.3 sample_probability: 0.1 pis_baix: &pis_baix canonical: pis baix abbreviated: pb sample: true canonical_probability: 0.4 abbreviated_probability: 0.5 sample_probability: 0.1 sota: &sota canonical: sota sample: true canonical_probability: 0.8 sample_probability: 0.2 # Used when floor number is < 0 (starts at -1 in all countries) soterrani: &soterrani canonical: soterrani abbreviated: so sample: true canonical_probability: 0.5 abbreviated_probability: 0.3 sample_probability: 0.2 # e.g. soterrani 1 numeric: direction: left numeric_affix: affix: so direction: left # e.g. segon soterrani ordinal: direction: right standalone_probability: 0.985 number_abs_value: true number_min_abs_value: 1 numeric_probability: 0.005 numeric_affix_probability: 0.005 ordinal_probability: 0.005 sub_soterrani: &sub_soterrani canonical: sub soterrani abbreviated: ss sample: true # e.g. sub soterrani 1 numeric: direction: left numeric_affix: affix: ss direction: left # e.g. segon sub soterrani ordinal: direction: right number_abs_value: true number_min_abs_value: 2 # Soterrani 2 == Sub-soterrani 1 number_subtract_abs_value: 1 standalone_probability: 0.985 numeric_probability: 0.005 numeric_affix_probability: 0.005 ordinal_probability: 0.005 entresol: &entresol canonical: entresòl abbreviated: entl half_floors: true sample: true canonical_probability: 0.7 abbreviated_probability: 0.2 sample_probability: 0.1 # e.g. entresòl 2 numeric: direction: left # e.g. ent2 numeric_affix: affix: ent direction: left # e.g. segon entresòl ordinal: direction: right numeric_probability: 0.1 numeric_affix_probability: 0.1 ordinal_probability: 0.2 standalone_probability: 0.6 pis_principal: &pis_principal canonical: pis principal abbreviated: pis pral sample: true canonical_probability: 0.2 abbreviated_probability: 0.3 sample_probability: 0.5 principal: &principal canonical: principal abbreviated: pral sample: true canonical_probability: 0.2 abbreviated_probability: 0.6 sample_probability: 0.2 atic: &atic canonical: àtic abbreviated: át sample: true canonical_probability: 0.7 abbreviated_probability: 0.1 sample_probability: 0.2 sobreatic: &sobreatic canonical: sobreàtic aliases: "<-1": default: *soterrani probability: 0.6 alternatives: - alternative: *sub_soterrani probability: 0.3995 - alternative: *pis probability: 0.0005 "-1": default: *soterrani probability: 0.9995 alternatives: - alternative: *pis probability: 0.0005 # Special token for half-floors half_floors: default: *entresol "0": default: *baixos probability: 0.495 alternatives: - alternative: *pis_baix probability: 0.395 - alternative: *sota probability: 0.1 - alternative: *pis # Piso 0 is uncommon probability: 0.01 top: default: *pis probability: 0.85 alternatives: - alternative: *atic probability: 0.1 - alternative: *sobreatic probability: 0.05 numbering_starts_at: 0 alphanumeric: default: *pis add_number_phrase: true add_number_phrase_probability: 0.05 numeric_probability: 0.99 alpha_probability: 0.01 blocks: alphanumeric: default: canonical: bloc abbreviated: bl sample: true canonical_probability: 0.6 abbreviated_probability: 0.2 sample_probability: 0.2 numeric: direction: left categories: near: default: canonical: a prop de probability: 0.5 alternatives: - alternative: canonical: prop de probability: 0.2 - alternative: canonical: prop probability: 0.1 - alternative: canonical: a prop probability: 0.1 - alternative: canonical: proper probability: 0.05 - alternative: canonical: proper a probability: 0.05 nearby: default: canonical: proper probability: 0.5 alternatives: - alternative: canonical: a prop probability: 0.1 - alternative: canonical: a prop d'aquí probability: 0.1 - alternative: canonical: a prop d'aqui probability: 0.1 - alternative: canonical: aquí probability: 0.1 - alternative: canonical: aqui probability: 0.1 near_me: default: canonical: a prop meu in: default: canonical: a probability: 0.6 alternatives: - alternative: canonical: dins probability: 0.2 - alternative: canonical: en probability: 0.2 # Probabilities of each phrase near_probability: 0.35 nearby_probability: 0.2 near_me_probability: 0.1 in_probability: 0.35 cross_streets: and: *i amb: &amb canonical: amb a: &a canonical: a corner_of: &cantonada_de canonical: cantonada de sample: true canonical_probability: 0.7 sample_probability: 0.3 at_the_corner_of: &a_la_cantonada_de canonical: a la cantonada de sample: true canonical_probability: 0.7 sample_probability: 0.3 corner: &cantonada canonical: cantonada sample: true canonical_probability: 0.7 sample_probability: 0.3 intersection: default: *i probability: 0.55 alternatives: - alternative: *amb probability: 0.2 - alternative: *a probability: 0.1 - alternative: *cantonada_de probability: 0.09 - alternative: *a_la_cantonada_de probability: 0.05 - alternative: *cantonada probability: 0.01 between: canonical: entre sample: true canonical_probability: 0.8 sample_probability: 0.2 parentheses_probabililty: 0.5 po_boxes: apartat: &apartat canonical: apartat abbreviated: apt sample: true canonical_probability: 0.5 abbreviated_probability: 0.3 sample_probability: 0.2 numeric: direction: left add_number_phrase: true add_number_phrase_probability: 0.4 # Apt No 1234 numeric_probability: 1.0 alphanumeric: sample: false default: *apartat numeric_probability: 0.9 # Apt 123 alpha_probability: 0.05 # Apt A numeric_plus_alpha_probability: 0.04 # Apt 123G alpha_plus_numeric_probability: 0.01 # Apt A123 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 digits: - length: 1 probability: 0.05 - length: 2 probability: 0.1 - length: 3 probability: 0.2 - length: 4 probability: 0.5 - length: 5 probability: 0.1 - length: 6 probability: 0.05 postcodes: alphanumeric: default: canonical: codi postal abbreviated: cp sample: true canonical_probability: 0.01 abbreviated_probability: 0.95 sample_probability: 0.04 numeric: # Postcodes in Spain and Latin America are sometimes prefixed by CP direction: left numeric_affix: affix: cp direction: left # null_probability means the chance of doing nothing e.g. just the postal code null_probability: 0.7 numeric_probability: 0.18 numeric_affix_probability: 0.12 strict_numeric: true directions: right: &dreta canonical: dreta abbreviated: dta sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: right numeric_affix: affix: d direction: right whitespace_probability: 0.1 numeric_probability: 0.4 numeric_affix_probability: 0.6 left: &esquerra canonical: esquerra abbreviated: esq sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: right numeric_affix: affix: e direction: right whitespace_probability: 0.1 numeric_probability: 0.4 numeric_affix_probability: 0.6 rear: &posterior canonical: posterior abbreviated: pos sample: true canonical_probability: 0.6 abbreviated_probability: 0.2 sample_probability: 0.2 numeric: direction: right front: &front canonical: front sample: true canonical_probability: 0.8 sample_probability: 0.2 numeric: direction: right alternatives: - alternative: *dreta probability: 0.45 - alternative: *esquerra probability: 0.45 - alternative: *posterior probability: 0.05 - alternative: *front probability: 0.05 anteroposterior: alternatives: - alternative: *front probability: 0.5 - alternative: *posterior probability: 0.5 lateral: alternatives: - alternative: *dreta probability: 0.5 - alternative: *esquerra probability: 0.5 cardinal_directions: east: &est canonical: est abbreviated: e canonical_probability: 0.4 abbreviated_probability: 0.6 numeric: direction: right numeric_affix: affix: e direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 west: &oest canonical: oest abbreviated: w canonical_probability: 0.4 abbreviated_probability: 0.6 numeric: direction: right numeric_affix: affix: w direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 north: &nord canonical: nord abbreviated: n canonical_probability: 0.4 abbreviated_probability: 0.6 numeric: direction: right numeric_affix: affix: n direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 south: &sud canonical: sud abbreviated: s canonical_probability: 0.4 abbreviated_probability: 0.6 numeric: direction: right numeric_affix: affix: s direction: right numeric_probability: 0.5 numeric_affix_probability: 0.5 alternatives: - alternative: *nord probability: 0.25 - alternative: *est probability: 0.25 - alternative: *sud probability: 0.25 - alternative: *oest probability: 0.25 entrances: entrada: &entrada canonical: entrada abbreviated: entr sample: true canonical_probability: 0.5 abbreviated_probability: 0.2 sample_probability: 0.3 numeric: direction: left # Entrance 1, Entrance A, etc. alphanumeric: default: *entrada numeric_probability: 0.1 # e.g. Entrance 1 alpha_probability: 0.85 # e.g. Entrnace A numeric_plus_alpha_probability: 0.025 # e.g. 1A alpha_plus_numeric_probability: 0.025 # e.g. A1 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 directional: modifier: alternatives: - alternative: *nord - alternative: *sud - alternative: *est - alternative: *oest - alternative: *dreta - alternative: *esquerra - alternative: *posterior - alternative: *front staircases: escala: &escala canonical: escala abbreviated: esc sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left alphanumeric: # For alphanumerics, Stair A, Stair 1, etc. default: *escala numeric_probability: 0.6 # e.g. Escalera 1 alpha_probability: 0.35 # e.g. Escalera A numeric_plus_alpha_probability: 0.025 # e.g. 1A alpha_plus_numeric_probability: 0.025 # e.g. A1 alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 directional: direction: right # e.g. Escalera Izq direction_probability: 0.8 modifier: alternatives: - alternative: *nord - alternative: *sud - alternative: *est - alternative: *oest - alternative: *dreta - alternative: *esquerra - alternative: *posterior - alternative: *front units: flat: &apartament canonical: apartament abbreviated: apmt sample: true canonical_probability: 0.3 abbreviated_probability: 0.4 sample_probability: 0.3 numeric: direction: left door: &porta canonical: porta abbreviated: pta sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 numeric: direction: left # If it's just puerta B, many times it's just e.g. 3o B for "tercero piso puerta B" null_phrase_probability: 0.15 ordinal: direction: right gender: f direction_probability: 0.95 # Let it vary occasionally e.g. Porta 2a null_phrase_probability: 0.8 # Let e.g. 5a be the entire unit string numeric_probability: 0.25 ordinal_probability: 0.75 lletra: &lletra canonical: lletra sample: true canonical_probability: 0.9 sample_probability: 0.1 numeric: direction: left office: &oficina canonical: oficina abbreviated: of sample: true canonical_probability: 0.4 abbreviated_probability: 0.3 sample_probability: 0.3 numeric: direction: left # Another word for unit, used more in Colombia unitat: &unitat canonical: unitat abbreviated: un sample: true canonical_probability: 0.4 abbreviated_probability: 0.4 sample_probability: 0.2 lot: &lot canonical: lot abbreviated: lt sample: true canonical_probability: 0.6 abbreviated_probability: 0.2 sample_probability: 0.2 parcel: &parcella canonical: parcel·la sample: true canonical_probability: 0.8 sample_probability: 0.2 habitacio: &habitacio canonical: habitació sample: true canonical_probability: 0.8 sample_probability: 0.2 casa: &casa canonical: casa numeric: direction: left room: &sala canonical: sala numeric: direction: left alphanumeric: &unit_alphanumeric default: *porta probability: 0.8 sample: true alternatives: - alternative: *apartament probability: 0.1 - alternative: *casa probability: 0.1 # Separate random probability for adding directions like 2o Izq, 2 Dcha, etc. add_direction: true add_direction_probability: 0.1 add_direction_numeric: true # Only for numbers add_direction_standalone: true # A unit can be as simple as "D" numeric_probability: 0.7 # e.g. Porta 1a numeric_plus_alpha_probability: 0.01 # e.g. Porta 1A alpha_plus_numeric_probability: 0.01 # e.g. Porta A1 alpha_probability: 0.28 # e.g. Porta A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 alpha: default: *porta probability: 0.8 alternatives: - alternative: *lletra probability: 0.12 - alternative: *apartament probability: 0.05 - alternative: *casa probability: 0.01 - alternative: *unitat probability: 0.01 - alternative: *habitacio probability: 0.01 zones: residential: *unit_alphanumeric commercial: default: *oficina probability: 0.8 alternatives: - alternative: *sala probability: 0.2 numeric_probability: 0.9 # e.g. Oficina 1 numeric_plus_alpha_probability: 0.01 # e.g. Oficina 1A alpha_plus_numeric_probability: 0.01 # e.g. Oficina A1 alpha_probability: 0.08 # e.g. Oficina A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 alpha: default: *oficina probability: 0.8 alternatives: - alternative: *sala probability: 0.15 - alternative: *lletra probability: 0.05 industrial: default: *lot probability: 0.5 alternatives: - alternative: *oficina probability: 0.3 - alternative: *unitat probability: 0.19 - alternative: *parcella probability: 0.01 numeric_probability: 0.9 # e.g. Lote 1 numeric_plus_alpha_probability: 0.01 # e.g. Lote 1A alpha_plus_numeric_probability: 0.01 # e.g. Lote A1 alpha_probability: 0.08 # e.g. Lote A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 university: default: *sala probability: 0.9 alternatives: - alternative: *porta probability: 0.1 numeric_probability: 0.9 # e.g. Sala 1 numeric_plus_alpha_probability: 0.01 # e.g. Sala 1A alpha_plus_numeric_probability: 0.01 # e.g. Sala A1 alpha_probability: 0.08 # e.g. Sala A alpha_plus_numeric: whitespace_probability: 0.1 numeric_plus_alpha: whitespace_probability: 0.1 alpha: default: *sala probability: 0.9 alternatives: - alternative: *porta probability: 0.08 - alternative: *lletra probability: 0.02 allotments: lot: default: *lot numeric_probability: 0.8 alphanumeric_probability: 0.1 alpha_probability: 0.1 parcel: default: *parcella numeric_probability: 0.3 alphanumeric_probability: 0.3 alpha_probability: 0.4 lot_probability: 0.9 parcel_probability: 0.06 lot_plus_parcel_probability: 0.02 parcel_plus_lot_probability: 0.02