/** * SPDX-FileCopyrightText: Peter Pentchev * SPDX-License-Identifier: BSD-2-Clause */ #define _GNU_SOURCE #include #include #include #include #include #include #include #define RE_LOCALE \ "^" \ "([a-zA-Z0-9]+)" \ "(" \ "_" \ "([a-zA-Z0-9]+)" \ ")?" \ "(" \ "\\." \ "([a-zA-Z0-9-]+)" \ ")?" \ "(" \ "@" \ "([a-zA-Z0-9]+)" \ ")?" \ "$" static const char * const utf8_languages[] = {"C", "en", "de", "es", "it", NULL}; static const char * const locale_vars[] = { "LC_ALL", "LANG", "LC_MESSAGES", "LC_COLLATE", "LC_NAME", "LC_IDENTIFICATION", "LC_CTYPE", "LC_NUMERIC", "LC_TIME", "LC_MONETARY", "LC_PAPER", "LC_ADDRESS", "LC_TELEPHONE", "LC_MEASUREMENT", NULL, }; static regex_t re_locale_; static bool re_compiled; extern char ** const environ; static const regex_t * build_re_locale(void) { if (!re_compiled) { const int res = regcomp(&re_locale_, RE_LOCALE, REG_EXTENDED); if (res != 0) { const size_t l = regerror(res, &re_locale_, NULL, 0); #if __STDC_NO_VLA__ #error VLA support is missing #endif char errbuf[l]; regerror(res, &re_locale_, errbuf, sizeof(errbuf)); errx(1, "Internal error: regcomp() failed: %s", errbuf); } re_compiled = true; } return &re_locale_; } static inline size_t count_strings(const char * const * const arr) { size_t count = 0; while (arr[count] != NULL) count++; return count; } static void free_strings(char ** const arr) { for (char **vptr = arr; *vptr != NULL; vptr++) free(*vptr); free(arr); } static inline bool has_string(char * const * const haystack, const char * const needle) { for (char * const *vptr = haystack; *vptr != NULL; vptr++) if (strcmp(*vptr, needle) == 0) return true; return false; } char * detect_utf8_locale(const char * const * const pref_languages) { const regex_t * const re_locale = build_re_locale(); const char * const * const languages = pref_languages != NULL ? pref_languages : utf8_languages; const size_t unweight = count_strings(languages); char *best_loc = strdup("C"); size_t best_prio = unweight; FILE * const locfile = popen("locale -a", "r"); if (locfile == NULL) { free(best_loc); warn("Could not execute `locale -a`"); return NULL; } char locname[200]; while (fgets(locname, sizeof(locname), locfile) != NULL) { size_t len = strlen(locname); while (len > 0 && (locname[len - 1] == '\r' || locname[len - 1] == '\n')) locname[--len] = '\0'; regmatch_t matches[8]; if (regexec(re_locale, locname, sizeof(matches) / sizeof(matches[0]), matches, 0) == REG_NOMATCH) continue; /* Check if the codeset is "utf8" or "UTF-8". */ if (matches[5].rm_so == -1) continue; if (( matches[5].rm_eo - matches[5].rm_so != 4 || strncmp(&locname[matches[5].rm_so], "utf8", 4) != 0 ) && ( matches[5].rm_eo - matches[5].rm_so != 5 || strncmp(&locname[matches[5].rm_so], "UTF-8", 5) != 0 )) continue; /* Temporarily null-terminate the language name. */ const char save_sep = locname[matches[1].rm_eo]; locname[matches[1].rm_eo] = '\0'; for (size_t prio = 0; prio < best_prio && languages[prio] != NULL; prio++) { if (strcmp(locname, languages[prio]) == 0) { locname[matches[1].rm_eo] = save_sep; if (prio < best_prio) { best_prio = prio; free(best_loc); best_loc = strdup(locname); } break; } } if (best_prio == 0) break; } const bool failed = ferror(locfile); if (failed) warn("Could not read the output of `locale -a`"); const int status = pclose(locfile); if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { warn("`locale -a` failed"); free(best_loc); return NULL; } if (failed) { free(best_loc); return NULL; } return best_loc; } char ** get_utf8_vars(const char * const * const languages) { char * const locname = detect_utf8_locale(languages); if (locname == NULL) return NULL; char ** const vars = calloc(3, sizeof(*vars)); if (vars == NULL) { warn("Could not allocate memory for 3 environment variables"); free(locname); return NULL; } if (asprintf(&vars[0], "LC_ALL=%s", locname) == -1) { warn("Could not build an LC_ALL=... string"); free(vars); free(locname); return NULL; } free(locname); if (asprintf(&vars[1], "LANGUAGE=") == -1) { warn("Could not build a LANGUAGE= string"); free(vars); return NULL; } return vars; } static size_t find_env_var(const char * const evar, char ** const vars) { const char * const eq = strchr(evar, '='); if (eq == NULL) return count_strings((const char * const *)vars); const size_t ofs = eq - evar + 1; size_t vidx = 0; for (char **var = vars; *var != NULL; var++, vidx++) if (strncmp(evar, *var, ofs) == 0) return vidx; return vidx; } static void free_strings_not_found(char ** const vars, bool * const vfound) { bool *found = vfound; for (char **var = vars; *var != NULL; var++, found++) if (!*found) free(*var); free(vars); free(vfound); } char ** get_utf8_env(const char * const * const languages) { char ** const vars = get_utf8_vars(languages); if (vars == NULL) return NULL; const size_t vcount = count_strings((const char * const *)vars); bool * const vfound = calloc(vcount, sizeof(*vfound)); if (vfound == NULL) { warn("Could not allocate room for %zu boolean flags", vcount); free_strings(vars); return NULL; } const size_t ecount = count_strings((const char * const *)environ); const size_t allocated = ecount + 3; char ** const res = calloc(allocated, sizeof(*res)); if (res == NULL) { warn("Could not allocate room for %zu environment variables", ecount + 3); free_strings(vars); free(vfound); return NULL; } size_t next = 0; for (char **env = environ; *env != NULL; env++) { char * const evar = *env; const size_t found = find_env_var(evar, vars); if (found == vcount) { char * const copy = strdup(evar); if (copy == NULL) { warn("Could not copy an environment variable"); free_strings_not_found(vars, vfound); free_strings(res); return NULL; } res[next++] = copy; } else if (!vfound[found]) { res[next++] = vars[found]; vfound[found] = true; } else { warnx("Setting an environment variable twice: %s", evar); continue; } if (next >= allocated) { warn("The environment changed while we were processing it"); free_strings_not_found(vars, vfound); free_strings(res); return NULL; } } for (size_t vidx = 0; vidx < vcount; vidx++) { if (vfound[vidx]) continue; res[next++] = vars[vidx]; vfound[vidx] = true; if (next >= allocated) { warn("The environment changed while we were processing it"); free_strings_not_found(vars, vfound); free_strings(res); return NULL; } } free(vars); free(vfound); return res; } char ** get_preferred_languages(void) { const regex_t * const re_locale = build_re_locale(); const size_t allocated = count_strings(locale_vars) + 2; char **languages = calloc(allocated, sizeof(*languages)); if (languages == NULL) { warn("Could not allocate memory for %zu language strings", allocated); return NULL; } size_t count = 0; for (const char * const *lvar = locale_vars; *lvar != NULL; lvar++) { const char * const value = getenv(*lvar); if (value == NULL) continue; regmatch_t matches[8]; if (regexec(re_locale, value, sizeof(matches) / sizeof(matches[0]), matches, 0) == REG_NOMATCH) continue; /* Check if the codeset is "utf8" or "UTF-8". */ if (matches[5].rm_so == -1) continue; if (( matches[5].rm_eo - matches[5].rm_so != 4 || strncmp(&value[matches[5].rm_so], "utf8", 4) != 0 ) && ( matches[5].rm_eo - matches[5].rm_so != 5 || strncmp(&value[matches[5].rm_so], "UTF-8", 5) != 0 )) continue; char * const lang = strndup(value, matches[1].rm_eo); if (has_string(languages, lang)) free(lang); else languages[count++] = lang; } if (!has_string(languages, "C")) languages[count++] = strdup("C"); if (count + 1 < allocated) { char ** const nlang = realloc(languages, (count + 1) * sizeof(*languages)); if (nlang == NULL) { warn("Could not reallocate memory for %zu language names", count + 1); free_strings(languages); return NULL; } languages = nlang; } return languages; }