// Copyright (C) 2004-2021 Artifex Software, Inc. // // This file is part of MuPDF. // // MuPDF is free software: you can redistribute it and/or modify it under the // terms of the GNU Affero General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more // details. // // You should have received a copy of the GNU Affero General Public License // along with MuPDF. If not, see // // Alternative licensing terms are available from the licensor. // For commercial licensing, see or contact // Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato, // CA 94945, U.S.A., +1(415)492-9861, for further information. #include "mupdf/fitz.h" #include "xps-imp.h" #include #include FT_FREETYPE_H #include FT_ADVANCES_H static inline int ishex(int a) { return (a >= 'A' && a <= 'F') || (a >= 'a' && a <= 'f') || (a >= '0' && a <= '9'); } static inline int unhex(int a) { if (a >= 'A' && a <= 'F') return a - 'A' + 0xA; if (a >= 'a' && a <= 'f') return a - 'a' + 0xA; if (a >= '0' && a <= '9') return a - '0'; return 0; } int xps_count_font_encodings(fz_context *ctx, fz_font *font) { FT_Face face = fz_font_ft_face(ctx, font); return face->num_charmaps; } void xps_identify_font_encoding(fz_context *ctx, fz_font *font, int idx, int *pid, int *eid) { FT_Face face = fz_font_ft_face(ctx, font); *pid = face->charmaps[idx]->platform_id; *eid = face->charmaps[idx]->encoding_id; } void xps_select_font_encoding(fz_context *ctx, fz_font *font, int idx) { FT_Face face = fz_font_ft_face(ctx, font); FT_Set_Charmap(face, face->charmaps[idx]); } int xps_encode_font_char(fz_context *ctx, fz_font *font, int code) { FT_Face face = fz_font_ft_face(ctx, font); int gid = FT_Get_Char_Index(face, code); if (gid == 0 && face->charmap && face->charmap->platform_id == 3 && face->charmap->encoding_id == 0) gid = FT_Get_Char_Index(face, 0xF000 | code); return gid; } void xps_measure_font_glyph(fz_context *ctx, xps_document *doc, fz_font *font, int gid, xps_glyph_metrics *mtx) { int mask = FT_LOAD_NO_SCALE | FT_LOAD_IGNORE_TRANSFORM; FT_Face face = fz_font_ft_face(ctx, font); FT_Fixed hadv = 0, vadv = 0; fz_lock(ctx, FZ_LOCK_FREETYPE); FT_Get_Advance(face, gid, mask, &hadv); FT_Get_Advance(face, gid, mask | FT_LOAD_VERTICAL_LAYOUT, &vadv); fz_unlock(ctx, FZ_LOCK_FREETYPE); mtx->hadv = (float) hadv / face->units_per_EM; mtx->vadv = (float) vadv / face->units_per_EM; mtx->vorg = (float) face->ascender / face->units_per_EM; } static fz_font * xps_lookup_font_imp(fz_context *ctx, xps_document *doc, char *name) { xps_font_cache *cache; for (cache = doc->font_table; cache; cache = cache->next) if (!xps_strcasecmp(cache->name, name)) return fz_keep_font(ctx, cache->font); return NULL; } static void xps_insert_font(fz_context *ctx, xps_document *doc, char *name, fz_font *font) { xps_font_cache *cache = fz_malloc_struct(ctx, xps_font_cache); cache->font = NULL; cache->name = NULL; fz_try(ctx) { cache->font = fz_keep_font(ctx, font); cache->name = fz_strdup(ctx, name); cache->next = doc->font_table; } fz_catch(ctx) { fz_drop_font(ctx, cache->font); fz_free(ctx, cache->name); fz_free(ctx, cache); fz_rethrow(ctx); } doc->font_table = cache; } /* * Some fonts in XPS are obfuscated by XOR:ing the first 32 bytes of the * data with the GUID in the fontname. */ static void xps_deobfuscate_font_resource(fz_context *ctx, xps_document *doc, xps_part *part) { unsigned char buf[33]; unsigned char key[16]; unsigned char *data; size_t size; char *p; int i; size = fz_buffer_storage(ctx, part->data, &data); if (size < 32) { fz_warn(ctx, "insufficient data for font deobfuscation"); return; } p = strrchr(part->name, '/'); if (!p) p = part->name; for (i = 0; i < 32 && *p; p++) { if (ishex(*p)) buf[i++] = *p; } buf[i] = 0; if (i != 32) { fz_warn(ctx, "cannot extract GUID from obfuscated font part name"); return; } for (i = 0; i < 16; i++) key[i] = unhex(buf[i*2+0]) * 16 + unhex(buf[i*2+1]); for (i = 0; i < 16; i++) { data[i] ^= key[15-i]; data[i+16] ^= key[15-i]; } } static void xps_select_best_font_encoding(fz_context *ctx, xps_document *doc, fz_font *font) { static struct { int pid, eid; } xps_cmap_list[] = { { 3, 10 }, /* Unicode with surrogates */ { 3, 1 }, /* Unicode without surrogates */ { 3, 5 }, /* Wansung */ { 3, 4 }, /* Big5 */ { 3, 3 }, /* Prc */ { 3, 2 }, /* ShiftJis */ { 3, 0 }, /* Symbol */ { 1, 0 }, { -1, -1 }, }; int i, k, n, pid, eid; n = xps_count_font_encodings(ctx, font); for (k = 0; xps_cmap_list[k].pid != -1; k++) { for (i = 0; i < n; i++) { xps_identify_font_encoding(ctx, font, i, &pid, &eid); if (pid == xps_cmap_list[k].pid && eid == xps_cmap_list[k].eid) { xps_select_font_encoding(ctx, font, i); return; } } } fz_warn(ctx, "cannot find a suitable cmap"); } fz_font * xps_lookup_font(fz_context *ctx, xps_document *doc, char *base_uri, char *font_uri, char *style_att) { char partname[1024]; char fakename[1024]; char *subfont; int subfontid = 0; xps_part *part; fz_font *font; xps_resolve_url(ctx, doc, partname, base_uri, font_uri, sizeof partname); subfont = strrchr(partname, '#'); if (subfont) { subfontid = atoi(subfont + 1); *subfont = 0; } /* Make a new part name for font with style simulation applied */ fz_strlcpy(fakename, partname, sizeof fakename); if (style_att) { if (!strcmp(style_att, "BoldSimulation")) fz_strlcat(fakename, "#Bold", sizeof fakename); else if (!strcmp(style_att, "ItalicSimulation")) fz_strlcat(fakename, "#Italic", sizeof fakename); else if (!strcmp(style_att, "BoldItalicSimulation")) fz_strlcat(fakename, "#BoldItalic", sizeof fakename); } font = xps_lookup_font_imp(ctx, doc, fakename); if (!font) { fz_buffer *buf = NULL; fz_var(buf); fz_try(ctx) { part = xps_read_part(ctx, doc, partname); } fz_catch(ctx) { if (fz_caught(ctx) == FZ_ERROR_TRYLATER) { if (doc->cookie) doc->cookie->incomplete = 1; else fz_rethrow(ctx); } else fz_warn(ctx, "cannot find font resource part '%s'", partname); return NULL; } /* deobfuscate if necessary */ if (strstr(part->name, ".odttf")) xps_deobfuscate_font_resource(ctx, doc, part); if (strstr(part->name, ".ODTTF")) xps_deobfuscate_font_resource(ctx, doc, part); fz_var(font); fz_try(ctx) { font = fz_new_font_from_buffer(ctx, NULL, part->data, subfontid, 1); xps_select_best_font_encoding(ctx, doc, font); xps_insert_font(ctx, doc, fakename, font); } fz_always(ctx) { xps_drop_part(ctx, doc, part); } fz_catch(ctx) { fz_drop_font(ctx, font); fz_warn(ctx, "cannot load font resource '%s'", partname); return NULL; } if (style_att) { fz_font_flags_t *flags = fz_font_flags(font); int bold = !!strstr(style_att, "Bold"); int italic = !!strstr(style_att, "Italic"); flags->fake_bold = bold; flags->is_bold = bold; flags->fake_italic = italic; flags->is_italic = italic; } } return font; } /* * Parse and draw an XPS element. * * Indices syntax: GlyphIndices = GlyphMapping ( ";" GlyphMapping ) GlyphMapping = ( [ClusterMapping] GlyphIndex ) [GlyphMetrics] ClusterMapping = "(" ClusterCodeUnitCount [":" ClusterGlyphCount] ")" ClusterCodeUnitCount = * DIGIT ClusterGlyphCount = * DIGIT GlyphIndex = * DIGIT GlyphMetrics = "," AdvanceWidth ["," uOffset ["," vOffset]] AdvanceWidth = ["+"] RealNum uOffset = ["+" | "-"] RealNum vOffset = ["+" | "-"] RealNum RealNum = ((DIGIT ["." DIGIT]) | ("." DIGIT)) [Exponent] Exponent = ( ("E"|"e") ("+"|"-") DIGIT ) */ static char * xps_parse_digits(char *s, int *digit) { *digit = 0; while (*s >= '0' && *s <= '9') { *digit = *digit * 10 + (*s - '0'); s ++; } return s; } static char * xps_parse_real_num(char *s, float *number, int *override) { char *tail; float v; v = fz_strtof(s, &tail); *override = tail != s; if (*override) *number = v; return tail; } static char * xps_parse_cluster_mapping(char *s, int *code_count, int *glyph_count) { if (*s == '(') s = xps_parse_digits(s + 1, code_count); if (*s == ':') s = xps_parse_digits(s + 1, glyph_count); if (*s == ')') s ++; return s; } static char * xps_parse_glyph_index(char *s, int *glyph_index) { if (*s >= '0' && *s <= '9') s = xps_parse_digits(s, glyph_index); return s; } static char * xps_parse_glyph_metrics(char *s, float *advance, float *uofs, float *vofs, int bidi_level) { int override; if (*s == ',') { s = xps_parse_real_num(s + 1, advance, &override); if (override && (bidi_level & 1)) *advance = -*advance; } if (*s == ',') s = xps_parse_real_num(s + 1, uofs, &override); if (*s == ',') s = xps_parse_real_num(s + 1, vofs, &override); return s; } fz_text * xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, fz_matrix ctm, fz_font *font, float size, float originx, float originy, int is_sideways, int bidi_level, char *indices, char *unicode) { xps_glyph_metrics mtx; fz_text *text; fz_matrix tm; float x = originx; float y = originy; char *us = unicode; char *is = indices; size_t un = 0; if (!unicode && !indices) fz_warn(ctx, "glyphs element with neither characters nor indices"); if (us) { if (us[0] == '{' && us[1] == '}') us = us + 2; un = strlen(us); } if (is_sideways) tm = fz_pre_scale(fz_rotate(90), -size, size); else tm = fz_scale(size, -size); text = fz_new_text(ctx); fz_try(ctx) { while ((us && un > 0) || (is && *is)) { int char_code = FZ_REPLACEMENT_CHARACTER; int code_count = 1; int glyph_count = 1; if (is && *is) { is = xps_parse_cluster_mapping(is, &code_count, &glyph_count); } if (code_count < 1) code_count = 1; if (glyph_count < 1) glyph_count = 1; /* TODO: add code chars with cluster mappings for text extraction */ while (code_count--) { if (us && un > 0) { int t = fz_chartorune(&char_code, us); us += t; un -= t; } } while (glyph_count--) { int glyph_index = -1; float u_offset = 0; float v_offset = 0; float advance; int dir; if (is && *is) is = xps_parse_glyph_index(is, &glyph_index); if (glyph_index == -1) glyph_index = xps_encode_font_char(ctx, font, char_code); xps_measure_font_glyph(ctx, doc, font, glyph_index, &mtx); if (is_sideways) advance = mtx.vadv * 100; else if (bidi_level & 1) advance = -mtx.hadv * 100; else advance = mtx.hadv * 100; if (fz_font_flags(font)->fake_bold) advance *= 1.02f; if (is && *is) { is = xps_parse_glyph_metrics(is, &advance, &u_offset, &v_offset, bidi_level); if (*is == ';') is ++; } if (bidi_level & 1) u_offset = -mtx.hadv * 100 - u_offset; u_offset = u_offset * 0.01f * size; v_offset = v_offset * 0.01f * size; if (is_sideways) { tm.e = x + u_offset + (mtx.vorg * size); tm.f = y - v_offset + (mtx.hadv * 0.5f * size); } else { tm.e = x + u_offset; tm.f = y - v_offset; } dir = bidi_level & 1 ? FZ_BIDI_RTL : FZ_BIDI_LTR; fz_show_glyph(ctx, text, font, tm, glyph_index, char_code, is_sideways, bidi_level, dir, FZ_LANG_UNSET); x += advance * 0.01f * size; } } } fz_catch(ctx) { fz_drop_text(ctx, text); fz_rethrow(ctx); } return text; } void xps_parse_glyphs(fz_context *ctx, xps_document *doc, fz_matrix ctm, char *base_uri, xps_resource *dict, fz_xml *root) { fz_device *dev = doc->dev; fz_xml *node; char *fill_uri; char *opacity_mask_uri; char *bidi_level_att; char *fill_att; char *font_size_att; char *font_uri_att; char *origin_x_att; char *origin_y_att; char *is_sideways_att; char *indices_att; char *unicode_att; char *style_att; char *transform_att; char *clip_att; char *opacity_att; char *opacity_mask_att; fz_xml *transform_tag = NULL; fz_xml *clip_tag = NULL; fz_xml *fill_tag = NULL; fz_xml *opacity_mask_tag = NULL; char *fill_opacity_att = NULL; fz_font *font; float font_size = 10; int is_sideways = 0; int bidi_level = 0; fz_text *text = NULL; fz_rect area; /* * Extract attributes and extended attributes. */ bidi_level_att = fz_xml_att(root, "BidiLevel"); fill_att = fz_xml_att(root, "Fill"); font_size_att = fz_xml_att(root, "FontRenderingEmSize"); font_uri_att = fz_xml_att(root, "FontUri"); origin_x_att = fz_xml_att(root, "OriginX"); origin_y_att = fz_xml_att(root, "OriginY"); is_sideways_att = fz_xml_att(root, "IsSideways"); indices_att = fz_xml_att(root, "Indices"); unicode_att = fz_xml_att(root, "UnicodeString"); style_att = fz_xml_att(root, "StyleSimulations"); transform_att = fz_xml_att(root, "RenderTransform"); clip_att = fz_xml_att(root, "Clip"); opacity_att = fz_xml_att(root, "Opacity"); opacity_mask_att = fz_xml_att(root, "OpacityMask"); for (node = fz_xml_down(root); node; node = fz_xml_next(node)) { if (fz_xml_is_tag(node, "Glyphs.RenderTransform")) transform_tag = fz_xml_down(node); if (fz_xml_is_tag(node, "Glyphs.OpacityMask")) opacity_mask_tag = fz_xml_down(node); if (fz_xml_is_tag(node, "Glyphs.Clip")) clip_tag = fz_xml_down(node); if (fz_xml_is_tag(node, "Glyphs.Fill")) fill_tag = fz_xml_down(node); } fill_uri = base_uri; opacity_mask_uri = base_uri; xps_resolve_resource_reference(ctx, doc, dict, &transform_att, &transform_tag, NULL); xps_resolve_resource_reference(ctx, doc, dict, &clip_att, &clip_tag, NULL); xps_resolve_resource_reference(ctx, doc, dict, &fill_att, &fill_tag, &fill_uri); xps_resolve_resource_reference(ctx, doc, dict, &opacity_mask_att, &opacity_mask_tag, &opacity_mask_uri); /* * Check that we have all the necessary information. */ if (!font_size_att || !font_uri_att || !origin_x_att || !origin_y_att) { fz_warn(ctx, "missing attributes in glyphs element"); return; } if (!indices_att && !unicode_att) return; /* nothing to draw */ if (is_sideways_att) is_sideways = !strcmp(is_sideways_att, "true"); if (bidi_level_att) bidi_level = atoi(bidi_level_att); /* * Find and load the font resource. */ font = xps_lookup_font(ctx, doc, base_uri, font_uri_att, style_att); if (!font) font = fz_new_base14_font(ctx, "Times-Roman"); fz_var(text); fz_try(ctx) { /* * Set up graphics state. */ ctm = xps_parse_transform(ctx, doc, transform_att, transform_tag, ctm); if (clip_att || clip_tag) xps_clip(ctx, doc, ctm, dict, clip_att, clip_tag); font_size = fz_atof(font_size_att); text = xps_parse_glyphs_imp(ctx, doc, ctm, font, font_size, fz_atof(origin_x_att), fz_atof(origin_y_att), is_sideways, bidi_level, indices_att, unicode_att); area = fz_bound_text(ctx, text, NULL, ctm); xps_begin_opacity(ctx, doc, ctm, area, opacity_mask_uri, dict, opacity_att, opacity_mask_tag); /* If it's a solid color brush fill/stroke do a simple fill */ if (fz_xml_is_tag(fill_tag, "SolidColorBrush")) { fill_opacity_att = fz_xml_att(fill_tag, "Opacity"); fill_att = fz_xml_att(fill_tag, "Color"); fill_tag = NULL; } if (fill_att) { float samples[FZ_MAX_COLORS]; fz_colorspace *colorspace; xps_parse_color(ctx, doc, base_uri, fill_att, &colorspace, samples); if (fill_opacity_att) samples[0] *= fz_atof(fill_opacity_att); xps_set_color(ctx, doc, colorspace, samples); fz_fill_text(ctx, dev, text, ctm, doc->colorspace, doc->color, doc->alpha, fz_default_color_params); } /* If it's a complex brush, use the charpath as a clip mask */ if (fill_tag) { fz_clip_text(ctx, dev, text, ctm, area); xps_parse_brush(ctx, doc, ctm, area, fill_uri, dict, fill_tag); fz_pop_clip(ctx, dev); } xps_end_opacity(ctx, doc, opacity_mask_uri, dict, opacity_att, opacity_mask_tag); if (clip_att || clip_tag) fz_pop_clip(ctx, dev); } fz_always(ctx) { fz_drop_text(ctx, text); fz_drop_font(ctx, font); } fz_catch(ctx) fz_rethrow(ctx); }