--- - name: simple-number-set tag: mi match: "string-length(text())=1 and translate(text(), 'ℂℕℚℝℤ', '')=''" # tricky way to match any of the letters instead of using 'or' replace: - intent: name: number-sets children: [x: "text()"] - name: sup-number-set tag: msup match: - "*[1][string-length(text())=1 and translate(., 'ℂℕℚℝℤ', '')=''] and" - "*[2][string-length(text())=1 and (text()='+' or text()='-' or self::m:mn)]" replace: - intent: name: number-sets children: - x: "*[1]" - x: "*[2]" - name: real-part tag: mi match: "(text()='Re' or text()='re' or text()='ℜ') and following-sibling::*[1][text()='\u2061']" # before function apply replace: - intent: name: real-part children: [] - name: imaginary-part tag: mi match: "(text()='Im' or text()='im' or text()='ℑ') and following-sibling::*[1][text()='\u2061']" # before function apply replace: - intent: name: imaginary-part children: [] - name: roman_numeral tag: [mi, mn, mtext] match: "@data-number" replace: - intent: name: mn children: [x: "@data-number"] - name: positive-or-negative tag: mrow match: "count(*)=2 and *[1][self::m:mo][text()='+' or text()='-']" variables: - AtLeftEdge: "EdgeNode(., 'left', 'mtd')" - AtLeftEdgeOfMTD: "name($AtLeftEdge) = 'mtd'" - ContinuedColumn: "$TableProperty='lines' or $TableProperty='system-of-equations' or $TableProperty='piecewise'" - InContinuedRow: "parent::m:mtd[parent::m:mtr[@data-intent-property[contains(., ':continued-row:')]]]" - FirstColumn: "$AtLeftEdge/../*[1]=$AtLeftEdge" # first child of mtr... replace: - test: # don't use postive/negative if a continued data cell if: "$AtLeftEdgeOfMTD and $ContinuedColumn and (not($FirstColumn) or $InContinuedRow)" then: - intent: xpath-name: "IfThenElse(*[1][self::m:mo][text()='+'], 'plus', 'minus')" children: [x: "*[2]"] else: - intent: xpath-name: "IfThenElse(*[1][self::m:mo][text()='+'], 'positive', 'negative')" children: [x: "*[2]"] - # (a,b) has many interpretations; (a, b] (etc) have fewer interpretations. # as an interval, it represents a set and hence a clue that it is an interval is that a set operator comes # before or after it. '=' is also common. # They also commonly stand by themselves, but so does the interpretation as a point or gcd, so we don't inclub (xxx, yyy) in the inference name: interval tag: mrow match: - "count(*)=3 and " # FIX: consider adding ]...[ versions - "(*[1][text()='(' or text()='['] and *[3][text()=')' or text()=']']) and" # match bracketing - "(*[2][count(*)=3 and *[2][text()=',']]) and" # inside should have ',' - "not(ancestor::*[IsBracketed(., '{', '}')]) and " # intervals are not part of set notation (e.g, { (x,y)∈L | ...}) # FIX: if both the first and third children of *[2] are mn, then make sure first <= third - "(" - " not(IsBracketed(., '(', ')')) or " # (.,.) is very ambiguous -- need more clues - " $ClearSpeak_Paren = 'Interval' or " # pref is set - " *[2]/*[1][contains(., '∞')] or " # starts with infinity - " *[2]/*[3][contains(., '∞')] or " # ends with infinity - " preceding-sibling::*[1][self::m:mo and ( text()='=' or IsInDefinition(., 'SubsetOperators') )] or " # context hint - " following-sibling::*[1][self::m:mo and ( text()='=' or IsInDefinition(., 'SubsetOperators') )]" # context hint - ")" replace: - test: - if: "*[1][text()='('] and *[3][text()=')']" then: - intent: name: "open-interval" children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] - else_if: "*[1][text()='('] and *[3][text()=']']" then: - intent: name: "open-closed-interval" children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] - else_if: "*[1][text()='['] and *[3][text()=']']" then: - intent: name: "closed-interval" children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] else: - intent: name: "closed-open-interval" children: [x: "*[2]/*[1]", x: "*[2]/*[3]"] - name: binomial-frac tag: mrow match: "IsBracketed(., '(', ')') and *[2][self::m:mfrac][translate(@linethickness, 'abcdefghijklmnopqrstuvwxyz', '')=0]" replace: - intent: name: binomial children: - x: "*[2]/*[1]" - x: "*[2]/*[2]" - # C with pre-subscript 'n' and post-subscript 'm'. The 'n' can be either in the sub or super position # See https://en.wikipedia.org/wiki/Binomial_coefficient#History_and_notation name: binomial-mmultiscripts tag: mmultiscripts variables: - Prescripts: "m:mprescripts/following-sibling::*" - Postscripts: "m:mprescripts/preceding-sibling::*[position() < last()]" match: - "*[1][self::m:mi and text()='C'] and" - "count($Prescripts)=2 and ($Prescripts[1][self::m:none] or $Prescripts[2][self::m:none]) and " - "count($Postscripts)=2 and $Postscripts[2][self::m:none]" replace: - intent: name: binomial children: - x: "IfThenElse(name($Prescripts[2])='none', $Prescripts[1], $Prescripts[2])" - x: "$Postscripts[1]" - name: binomial-msub # C_{n,k} tag: msub match: - "*[1][self::m:mi and text()='C'] and" - "*[2][self::m:mrow and count(*)=3 and *[2][.=',']]" replace: - intent: name: binomial children: - x: "*[2]/*[1]" - x: "*[2]/*[3]" - # P with pre-sub or superscript 'n' and post-subscript 'k' (https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n) name: permutation-mmultiscripts tag: mmultiscripts variables: - Prescripts: "m:mprescripts/following-sibling::*" - Postscripts: "m:mprescripts/preceding-sibling::*[position() < last()]" match: - "*[1][self::m:mi and text()='P'] and" - "count($Prescripts)=2 and ($Prescripts[1][self::m:none] or $Prescripts[2][self::m:none]) and " - "count($Postscripts)=2 and $Postscripts[2][self::m:none]" replace: - intent: name: permutation-symbol children: - test: if: "$Prescripts[1][self::m:none]" then: [x: "$Prescripts[2]"] else: [x: "$Prescripts[1]"] - x: "$Postscripts[1]" - # P with superscript 'n' and subscript 'k' (https://en.wikipedia.org/wiki/Permutation#k-permutations_of_n) name: permutation-msubsup tag: msubsup match: - "*[1][self::m:mi and text()='P']" replace: - intent: name: permutation-symbol children: - x: "*[3]" - x: "*[2]" # rules on scripted vertical bars ('evaluated at') - name: evaluated-at-msub tag: mrow match: "count(*)=2 and *[2][self::m:msub and *[1][self::m:mo][text()='|']]" replace: - intent: name: "evaluate" children: - x: "*[1]" - x: "*[2]/*[2]" - name: evaluated-at-msubsup tag: mrow match: "count(*)=2 and *[2][self::m:msubsup and *[1][self::m:mo][text()='|']]" replace: - intent: name: "evaluate" children: - x: "*[1]" - x: "*[2]/*[2]" - x: "*[2]/*[3]" # it is also used outside of brackets - name: bracketed-evaluated-at tag: msubsup match: "IsBracketed(*[1], '[', ']')" replace: - intent: name: "evaluate" children: - x: "*[1]/*[1]" - x: "*[2]" - x: "*[3]" - # vertical bars otherwise # could also be cardinality, length, ... name: absolute-value tag: mrow match: "IsBracketed(., '|', '|')" replace: - intent: name: "absolute-value" children: [x: "*[2]"] - name: default tag: msqrt match: "." replace: - intent: name: "square-root" children: [x: "*[1]"] - name: sqrt tag: mroot match: "*[2][self::m:mn and text()='2']" replace: - intent: name: "square-root" children: [x: "*[1]"] - name: default tag: mroot match: "." replace: - intent: name: "root" children: - x: "*[1]" - x: "*[2]" - name: log-base tag: msub match: - "*[1][self::m:mi][text()='log']" replace: - intent: name: log-base children: [x: "*[2]"] - name: log-base-power tag: msubsup match: - "*[1][self::m:mi][text()='log']" replace: - intent: name: log-base-power children: - x: "*[2]" - x: "*[3]" - name: bigop tag: [msub, munder] match: "IsInDefinition(*[1], 'LargeOperators') or contains(@data-intent-property, ':largeop:')" replace: - intent: name: "large-op" # Fix: the name in the spreadsheet needs updating/fixing children: - x: "*[1]" - x: "*[2]" - name: limit tag: [msub, munder] variables: [NoSpacesBase: "translate(*[1], ' \u00A0\u2004\u2005\u2006\u2007\u2008\u2009\u200A','')"] # Sometimes these have (weird) spaces in them match: "*[1][$NoSpacesBase='lim' or $NoSpacesBase='limsup' or $NoSpacesBase='liminf']" replace: - intent: name: "limit" children: - x: "$NoSpacesBase" - x: "*[2]" - name: modified-var tag: mover # breve, check, dot, double-dot, triple-dot, quadruple-dot, grave, hat, tilde, line/bar match: "*[1][self::m:mi] and *[2][translate(., '\u0306\u030c.\u00A8\u02D9\u20DB\u20DC`^~→¯_', '')='']" replace: - intent: name: "modified-variable" children: - x: "*[1]" - x: "*[2]" - # this captures the output for the mhchem's "<=>", "<<=>", and "<=>>" output (there are no Unicode arrows for them) # this isn't a perfect match, but should be good enough and allows merging all three (see github.com/NSoiffer/MathCAT/issues/60) name: chemistry-mhchem-equilibrium-arrow tag: mover match: - "*[1][substring(., 1, 1)='↽'] and" - "*[2][substring(., string-length(), 1)='⇀']" replace: - intent: name: "chemical-arrow-operator" children: - test: if: "*[1][self::m:mrow]" then_test: if: "*[2][self::m:mrow]" then: [t: "\u1f8d2"] # this is currently unassigned and may get used by UTC at some point (<=>) else: [t: "\u1f8d4"] # this is currently unassigned and may get used by UTC at some point (<<=>) else: [t: "\u1f8d3"] # this is currently unassigned and may get used by UTC at some point (<==>>) - # need to include the name of the element so the rules know whether to say "sub" or "super" name: chemistry tag: [msub, msup] match: "@data-chem-formula" replace: - intent: name: "chemical-formula" children: # have to list individual children because "*" results in an internal error (children aren't flat) - x: "name(.)" - x: "*[1]" - x: "*[2]" - name: chemistry-prescripts tag: mmultiscripts match: "@data-chem-formula" replace: - test: - if: "count(*)=4 or count(*)=6" then: - intent: name: "chemical-nuclide" children: - x: "*" else: # FIX: what other cases are there??? - intent: name: "chemical-formula" children: - x: "*" - name: chemical-element tag: [mi, mtext] match: "@data-chem-element" replace: - intent: name: "chemical-element" children: - x: "text()" - name: chemical-formula-op tag: [mo] match: "@data-chem-formula-op" replace: - intent: name: "chemical-formula-operator" children: - x: "text()" - name: chemical-arrow-op tag: [mo] # FIX: this is a duplicate of the list in chemistry.rs. Probably should pull out and add to definitions.yaml match: "@data-chem-equation-op and translate(., '→➔←⟶⟵⤻⇋⇌↿↾⇃⇂⥮⥯⇷⇸⤉⤈⥂⥄', '')=''" replace: - intent: name: "chemical-arrow-operator" children: - x: "text()" - name: chemical-equation-op tag: [mo] match: "@data-chem-equation-op" replace: - intent: name: "chemical-equation-operator" children: - x: "text()" - name: chemistry-state tag: mrow match: - "(@data-chem-formula or @data-chem-equation) and" - "IsBracketed(., '(', ')') and" - "*[2][text()='s' or text()='l' or text()='g' or text()='aq']" replace: - intent: name: "chemical-state" children: - x: "*[2]" # This needs to be before the simple "x prime" rule # minutes/seconds or feet/inches # If ' or " follows a number, then it is not "prime", but is a unit # Note the ASCII ' and " are converted to prime during canonicalization if in a superscript # Handles single, double, primes, and also double quote, which don't have to be in an msup # The rules are: # 1. If the prime follows a degree sign with a number (in various forms) or letter after it, # then it is minutes/seconds # 2. Else, if it follows a *number* (in various forms), then it feet/inches # 3. Else it is 'prime' # any ? ( count(match)==2 && # (name(match)=="mrow" || name(match)=="msup") && # (MatchString($1, "mo", "'") || MatchString($1, "mo", "′") || // apostrophe or prime # MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) && // double quote or double prime # ( # // '1': degree sign check # ( has_previous(match) && # ( (MatchString($1, "mo", "′") && # ( name(previous(match))=="msup" || # (name(previous(match))=="mrow" && count(previous(match))==2)) && # MatchString(previous(match)[1], "mo", "°")) || # ( has_previous(previous(match)) && # (MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) && # ( name(previous(match, 2))=="msup" || # (name(previous(match, 2))=="mrow" && count(previous(match, 2))==2)) && # MatchString(previous(match, 2)[1], "mo", "°")) # ) # ) || # # // '2': number or numeric fraction in front # name($0)=="mn" || # (name($0)=="mfrac" && name($0[0])=="mn" && name($0[1])=="mn") # ) # ) # => structure( # $0, # UIWord([$1[0],$1[-1]]) # {ruleRef="RR_unitsBase"; # singular=MatchString($0, "mn", "1"); # // if this follows a degree sign, then it is an angle notation (min/secs) regardless of the pref setting # // we have to either look at the previous entry for mins or the one prior to that for secs # pref= ( has_previous(match) && # ( (MatchString($1, "mo", "′") && # ( name(previous(match))=="msup" || # (name(previous(match))=="mrow" && count(previous(match))==2)) && # MatchString(previous(match)[1], "mo", "°")) || # ( has_previous(previous(match)) && # (MatchString($1, "mo", "″") || MatchString($1, "mo", "\"")) && # ( name(previous(match, 2))=="msup" || # (name(previous(match, 2))=="mrow" && count(previous(match, 2))==2)) && # MatchString(previous(match, 2)[1], "mo", "°")) # ) # ) ? "Angle" : "Length"; # } # ); # Pseudo-script characters are characters such as "degree sign" ('°') that are raised but in MathML should be in a superscript. # They are not spoken as if in a superscript (e.g "x degrees", not "x superscripts degrees") - name: skip-super tag: [msup, msubsup] match: "*[last()][translate(., '′″‴⁗†‡°*', '')='']" replace: - intent: name: "skip-super" children: [x: "*"] - name: mo-super tag: [msup, msubsup] match: "*[last()][self::m:mo]" replace: - intent: name: "say-super" children: [x: "*"] # rules for functions raised to a power # these could have been written on 'mrow' but putting them on msup seems more specific # to see if it is a function, we look right to see if the following sibling is apply-function - name: function-inverse tag: msup match: - "*[2][self::m:mrow][count(*)=2] and" # exponent is an mrow with children... - "*[2]/*[1][self::m:mo][text()='-'] and" # '-' - "*[2]/*[2][self::m:mn][text()=1] and" # and '1' - "*[1][self::m:mi] and IsInDefinition(*[1], 'TrigFunctionNames') and" # base is a trig function name (e.g, sin, sinh) - "following-sibling::*[1][self::m:mo][text()='\u2061']" # and msup is followed by invisible function apply replace: - intent: name: "inverse-function" children: - x: "*[1]" - name: bigop tag: [msubsup, munderover] match: "IsInDefinition(*[1], 'LargeOperators') or contains(@data-intent-property, ':largeop:')" replace: - intent: name: "large-op" # Fix: the name in the spreadsheet needs updating/fixing children: - x: "*[1]" - x: "*[2]" - x: "*[3]" - name: set tag: mrow # sets have { }s and should have at least one of ",∈∉|" (vertical bar is "such that") or be the empty set match: "IsBracketed(., '{', '}') and (count(*)=2 or (count(*)=3 and IsNode(*[2], 'leaf')) or . != translate(., ',∈∉|:', ''))" replace: - intent: name: "set" children: - test: if: count(*) = 2 then: [] else: [x: "*[2]"] - name: default tag: msub match: "." replace: - intent: name: "particular-value-of" children: - x: "*[1]" - x: "*[2]" - name: default tag: msup match: "." replace: - intent: name: "power" children: - x: "*[1]" - x: "*[2]" - name: default tag: msubsup match: "." replace: - intent: name: "power" children: - intent: name: "sub" children: - x: "*[1]" - x: "*[2]" - x: "*[3]" - name: default tag: mfrac match: "." replace: - intent: name: "fraction" children: - x: "*[1]" - x: "*[2]" # generic mtable: treat as multiline equations of some sort # if a property is set, handle that first - name: "piecewise" tag: mrow match: "count(*)=2 and *[1][text()='{'] and *[2][self::m:mtable]" replace: - x: "*[2]" - name: mtable-property-is-set tag: mtable variables: [MTableProperty: "IfThenElse(contains(@data-intent-property, ':grid:'), IfThenElse(parent::m:mrow[IsBracketed(., '|', '|')], 'determinant', 'matrix'), IfThenElse(contains(@data-intent-property, ':piecewise:'), 'piecewise', IfThenElse(contains(@data-intent-property, ':system-of-equation:'), 'system-of-equation', IfThenElse(contains(@data-intent-property, ':lines:'), 'lines',''))))"] match: "$MTableProperty !=''" replace: - intent: xpath-name: "$MTableProperty" children: - x: "*" - name: mtable-matrix-property tag: mtable match: - "..[self::m:mrow and (IsBracketed(., '(', ')') or IsBracketed(., '[', ']'))]" replace: - with: variables: - TableProperty: "'matrix'" replace: - intent: name: "matrix" children: - x: "*" - name: mtable-determinant-property tag: mtable match: "..[self::m:mrow and IsBracketed(., '|', '|')]" replace: - with: variables: - TableProperty: "'determinant'" replace: - intent: name: "determinant" children: - x: "*" - name: mtable-piecewise-property tag: mtable match: - "..[self::m:mrow and count(*)=2 and *[1][text()='{']]" replace: - with: variables: - TableProperty: "'piecewise'" replace: - intent: name: "piecewise" children: - x: "*" - name: mtable-equations-property tag: mtable match: "count(*) > 0 and *[1][contains(translate(., '=≠<>≤≥≦≧', '='), '=')]" replace: - with: variables: - TableProperty: "'system-of-equations'" replace: - intent: name: "system-of-equations" children: - x: "*" - name: mtable-lines-property tag: mtable match: "." # default is "lines, so no need to test contains(@data-intent-property, ':lines:') replace: - with: variables: - TableProperty: "'lines'" replace: - intent: name: "lines" children: - x: "*"