/** * @file Reusable constructs * @author ObserverOfTime * @license MIT */ /// // @ts-check /** * @param {"'" | ''} q * @see {@link https://www.w3.org/TR/xml/#NT-PubidChar} */ export const pubid_char = (q) => new RegExp(`[ \\r\\na-zA-Z0-9\\-${q}()+,./:=?;!*#@$_%]*`); /** * @param {GrammarSymbols} $ * @param {'"' | "'"} q */ export const att_value = ($, q) => seq( q, field( 'content', repeat(choice( new RegExp(`[^<&${q}]`), $._Reference )) ), q ); /** * @param {GrammarSymbols} $ * @param {'"' | "'"} q */ export const entity_value = ($, q) => seq( q, field( 'content', repeat(choice( new RegExp(`[^<%&${q}]`), $.PEReference, $._Reference )) ), q ); /** @param {RuleOrLiteral[]} choices */ export const str = (...choices) => choice( seq("'", ...choices, "'"), seq('"', ...choices, '"') ); /** * @param {GrammarSymbols} $ * @param {RuleOrLiteral[]} choices */ export const ref = ($, ...choices) => choice(...choices, $.PEReference); /** @param {RuleOrLiteral[]} rules */ export const rseq = (...rules) => repeat(seq(...rules)); /** @param {RuleOrLiteral[]} rules */ export const rseq1 = (...rules) => repeat1(seq(...rules)); const O = optional; /** @type {Record) => RuleOrLiteral>} */ export const rules = { _markupdecl: $ => choice( $.elementdecl, $.AttlistDecl, $._EntityDecl, $.NotationDecl, $.PI, $.Comment ), _DeclSep: $ => choice($.PEReference, $._S), elementdecl: $ => seq( '' ), contentspec: $ => choice( 'EMPTY', 'ANY', $.Mixed, $.children, $.PEReference ), Mixed: $ => choice( seq( '(', O($._S), ref($, '#PCDATA'), rseq( O($._S), '|', O($._S), ref($, $.Name), ), O($._S), rseq( $.PEReference, O($._S) ), ')', '*' ), prec(-1, seq( '(', O($._S), ref($, '#PCDATA'), O($._S), rseq( $.PEReference, O($._S) ), ')' )) ), children: $ => prec(1, seq( $._choice, O(choice('?', '*', '+')) )), _cp: $ => prec.left(seq( ref($, $.Name, $._choice), O(choice('?', '*', '+')) )), _choice: $ => seq( '(', O($._S), $._cp, rseq( O($._S), choice('|', ','), O($._S), $._cp ), rseq( O($._S), $.PEReference, ), O($._S), ')' ), AttlistDecl: $ => seq( '' ), AttDef: $ => prec.right(seq( $._S, ref($, $.Name), $._S, $._AttType, O(seq($._S, $.DefaultDecl)) )), _AttType: $ => choice( $.StringType, $.TokenizedType, $._EnumeratedType, $.PEReference ), StringType: _ => 'CDATA', TokenizedType: _ => token(choice( 'ID', 'IDREF', 'IDREFS', 'ENTITY', 'ENTITIES', 'NMTOKEN', 'NMTOKENS', )), _EnumeratedType: $ => choice( $.NotationType, $.Enumeration ), NotationType: $ => seq( 'NOTATION', $._S, '(', O($._S), ref($, $.Name), rseq( O($._S), '|', O($._S) ), ref($, $.Name), O($._S), ')' ), Enumeration: $ => seq( '(', O($._S), $.Nmtoken, rseq( O($._S), '|', O($._S), $.Nmtoken ), O($._S), ')' ), DefaultDecl: $ => choice( '#REQUIRED', '#IMPLIED', seq( O(seq('#FIXED', $._S)), $.AttValue ), $.PEReference ), _EntityDecl: $ => choice( $.GEDecl, $.PEDecl ), GEDecl: $ => seq( '' ), PEDecl: $ => seq( '' ), EntityValue: $ => choice( entity_value($, '"'), entity_value($, "'") ), NDataDecl: $ => seq($._S, 'NDATA', $._S, ref($, $.Name)), NotationDecl: $ => seq( '' ), PEReference: $ => seq('%', $.Name, ';'), _S: _ => /[ \t\r\n]+/, Name: _ => /[a-zA-Z_][a-zA-Z0-9_:.·-]*/, Nmtoken: _ => /[a-zA-Z0-9_:.·-]+/, _Reference: $ => choice($.EntityRef, $.CharRef), EntityRef: $ => seq('&', $.Name, ';'), CharRef: _ => choice( seq('&#', /[0-9]+/, ';'), seq('&#x', /[0-9a-fA-F]+/, ';') ), AttValue: $ => choice( att_value($, '"'), att_value($, "'") ), ExternalID: $ => choice( seq('SYSTEM', $._S, $.SystemLiteral), seq('PUBLIC', $._S, $.PubidLiteral, $._S, $.SystemLiteral) ), PublicID: $ => prec.right( seq(ref($, 'PUBLIC'), $._S, $.PubidLiteral) ), SystemLiteral: $ => choice( seq('"', alias(/[^"]*/, $.URI), '"'), seq("'", alias(/[^']*/, $.URI), "'") ), PubidLiteral: _ => choice( seq('"', pubid_char("'"), '"'), seq("'", pubid_char(''), "'") ), _VersionInfo: $ => seq( $._S, 'version', $._Eq, str($.VersionNum) ), VersionNum: _ => /1\.[0-9]+/, _EncodingDecl: $ => seq( $._S, 'encoding', $._Eq, str($.EncName) ), EncName: _ => /[A-Za-z][A-Za-z0-9._\-]*/, PI: $ => seq( '' ), _Eq: $ => seq(O($._S), '=', O($._S)) }