/* * LibAxl: Another XML library * Copyright (C) 2013 Advanced Software Production Line, S.L. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2.1 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA * 02111-1307 USA * * You may find a copy of the license under this software is released * at COPYING file. This is LGPL software: you are welcome to * develop proprietary applications using this library without any * royalty or fee but returning back any change, improvement or * addition in the form of source code, project image, documentation * patches, etc. * * For commercial support on build XML enabled solutions contact us: * * Postal address: * Advanced Software Production Line, S.L. * Edificio Alius A, Oficina 102, * C/ Antonio Suarez Nº 10, * Alcalá de Henares 28802 Madrid * Spain * * Email address: * info@aspl.es - http://www.aspl.es/xml */ /** * @internal * @brief XML 1.0 Third edition grammar * * [1] document ::= prolog element Misc* * [1] status: partially * * [2] Char ::= \x9 | \xA | \xD | \x20-\xD7FF | \xE000-\xFFFD | \x10000-\10FFFF * [2] status: not implemented * * [3] S ::= ( \x20 | \x9 | \xD | \xA) * [3] status: ok * * [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender * [4] status: not implemented * * [5] Name ::= ( Letter | '_' | ':' |) ( NameChar )* * [5] status: not implemented * * [6] Names ::= Name ( \x20 Name )* * [6] status: not implemented * * [7] Nmtoken ::= ( NameChar ) + * [7] status: not implemented * * [8] Nmtokens ::= Nmtoken (\x20 Nmtoken)* * [8] status: not implemented * * [9] EntityValue ::= '"' ( [^%&"] | PEReference | Reference )* '"' | "'" ( [^%&'] ! PEReference | Reference )* "'" * [9] status: not implemented * * [10] AttValue ::= '"' ( [^<&"] | Reference)* '"' | "'" ( [^<&'] | Reference )* "'" * [10] status: not implemented * * [11] SystemLiteral ::= ( '"' [^"]* '"') | ("'" [^']* "'") * [11] status: not implemented * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'") * "'" * [12] status: not implemented * * [13] PubidChar ::= \x20 | \xD | \xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] * [13] status: not implemented * * [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) * [14] status: not implemented * * [15] Comments ::= '' * [15] status: not implemented * * [16] PI ::= '' * [16] status: not implemented * * [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') | ('L' | 'l')) * [17] status: not implemented * * [18] CDsect ::= CDStart CData CDend * [18] status: not implemented * * [19] CDStart ::= '' Char*)) * [20] status: not implemented * * [21] CDEnd ::= ']]>' * [21] status: not implemented * * [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)? * [22] status: partially * * [23] XMLDecl ::= '' * [23] status: ok * * [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') * [24] status: ok * * [25] Eq ::= S? '=' S? * [25] status: ok * * [26] VersionNum ::= '1.0' * [26] status: ok * * [27] Misc ::= Comment | PI | S * [27] status: not implemented * * [28] doctypedecl ::= '' * [28] status: not implemented * * [28a] DeclSep ::= PEReference | S * [28a] status: not implemented * * [28b] intSubset ::= (markupdecl | DeclSep)* * [28b] status: not implemented * * [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment * [29] status: not implemented * * [30] extSubset ::= TextDecl? extSubsetDecl * [30] status: not implemented * * [31] extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep) * * [31] status: not implemented * * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"'" ('yes' | 'no') '"')) * [32] status: ok * * * ** productions 33 through 39 have been removed. It seems that this * ** productions were supporting xml:lang stuff that is easily * ** supported by using directily the xml standard rather than * ** mention it as an special production inside the language. * * [39] element ::= EmptyElemTag | Stag content ETag * [39] status: not implemented * * [40] Stag ::= '<' Name (S Attribute)* S? '>' * [40] status: not implemented * * [41] Attribute ::= Name Eq AttValue * [41] status: not implemented * * [42] ETag ::= '' * [42] status: not implemented * * [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* * [43] status: not implemented * * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' * [44] status: not implemented * * [45] elementdecl ::= '' * [45] status: not implemented * * [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children * [46] status: not implemented * * [47] children ::= (choice | seq) ('?' | '*' | '+')? * [47] status: not implemented * * [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')? * [48] status: not implemented * * [49] choice ::= '(' S? cp ( S? '|' S? cp)+ S? ')' * [49] status: not implemented * * [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')' * [50] status: not implemented * * [51] Mixed ::= '(' '#PCDATA' (S? '|' S? Name)* S? ')*' | '(' S? '#PCDATA' S? ')' * [51] status: not implemented * * [52] AttlistDecl ::= '' * [52] status: not implemented * * [53] AttDef ::= S Name S AttType S DefaultDecl * [53] status: not implemented * * [54] AttType ::= Stringtype | TokenizedType | Enumeratedtype * [54] status: not implemented * * [55] StringType ::= 'CDATA' * [55] status: not implemented * * [56] tokenized ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS' * [56] status: not implemented * * [57] EnumeratedType ::= NotationType | Enumeration * [57] status: not implemented * * [58] NotationType ::= 'NOTATION' S '(' S? Name (S? Name (S? '|' S? Name)* S? ')' * [58] status: not implemented * * [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' * [59] status: not implemented * * [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) * [60] status: not implemented * * [61] conditionalSect ::= includeSect | ignoreSect * [61] status: not implemented * * [62] includeSect ::= '' * [62] status: not implemented * * [63] ignoreSect ::= ' * [63] status: not implemented * * [64] ignoreSectContents ::= Ignore ('' Ignore) * * [64] status: not implemented * * [65] Ignore ::= Char * - (Char * ('') Char *) * [65] status: not implemented * * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-FA-F]+ ';' * [66] status: not implemented * * [67] Reference ::= EntityRef | CharRef * [67] status: not implemented * * [68] EntityRef ::= '&' Name ';' * [68] status: not implemented * * [69] PEReference ::= '%' Name ';' * [69] status: not implemented * * [70] EntityDecl ::= GEDecl | PEDecl * [70] status: not implemented * * [71] GEDecl ::= '' * [71] status: not implemented * * [72] PEDecl ::= '' * [72] status: not implemented * * [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?) * [73] status: not implemented * * [74] PEDef ::= EntityValue | ExternalID * [74] status: not implemented * * [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral * [75] status: not implemented * * [76] NDataDecl ::= S 'NData' S Name * [76] status: not implemented * * [77] TextDecl ::= '' * [77] status: not implemented * * [78] extParseEnt ::= TextDecl? content * [78] status: not implemented * * [80] EncodingDecl ::= S 'encoding' Eq ( '"' EncName '"' | "'" EncName "'" ) * [80] status: ok * * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* * [81] status: ok * * [82] NotationalDecl ::= '' * [82] status: not implemented * * [83] PublicID ::= 'PUBLIC' S PubidLiteral * [83] status: not implemented * * * * */ /** * \defgroup axl_doc_module Axl Doc: XML Documents related functions, loading XML documents and using them. */ /** * \addtogroup axl_doc_module * @{ */ #include #include #include #include #define LOG_DOMAIN "axl-doc" struct _axlDoc { /** * @internal * @brief A reference to the very first axlNode this axlDoc * has. */ axlNode * rootNode; /** * @internal * The document version. */ char * version; /** * @internal * @brief Current xml encoding document. */ char * encoding; /** * @internal * @brief Current entity encoding detected. */ const char * detected_encoding; /** * @internal If the document was found in a different encoding * than utf-8, this variable will hold its associated value to * allow returning to the original encoding. */ char * encoding_found; /** * @internal * @brief Current standalone configuration of the given \ref * axlDoc object. */ axl_bool standalone; /** * @internal * * @brief Parent node stack. This stack is used to control how * are nested nodes while creating/parsing xml files. This * nesting allows to not only properly contruct the xml but * also to check if it is well balanced. */ axlStack * parentNode; /** * @internal Binary stack to hold the xml:space preserve * status on each level (associated to the current node). */ axlBinaryStack * xmlPreserve; /** * @internal * * @brief Internal list to hold all PI targets read. */ axlList * piTargets; /** * @internal * * @brief Instruct the \ref axlDoc instance to notify that the * xml header have been defined. This helps to allow define PI * instruction that are only found inside the root document, * or after the xml header definition. */ axl_bool headerProcess; /** * @internal Factory to create items in a memory efficient * manner. */ axlFactory * item_factory; /** * @internal Factory to create nodes in a memory efficient * manner. */ axlFactory * node_factory; /** * @internal Factory to create nodes to hold content elements. */ axlFactory * content_factory; /** * @internal Factory to create nodes to hold attribute * elements. */ axlFactory * attr_factory; /** * @internal Factory to alloc strings. */ axlStrFactory * str_factory; }; struct _axlPI { /** * @internal * * @brief PI Target name. */ char * name; /** * @internal * * @brief PI target content. */ char * content; }; /* global references to handlers and user defined configuration */ axlDocDetectCodification detect_codification_func; axlPointer detect_codification_data; axlDocConfigureCodification configure_codification_func; axlPointer configure_codification_data; /** * @internal * * @brief Creates a new empty \ref axlDoc reference. * * Creates the parent stack used for parsing functions. * * @return A newly allocated \ref axlDoc reference. */ axlDoc * __axl_doc_new (axl_bool create_parent_stack) { axlDoc * result = axl_new (axlDoc, 1); /* check allocated value */ if (result == NULL) return NULL; /* default container lists */ result->parentNode = axl_stack_new (NULL); result->piTargets = axl_list_new (axl_list_always_return_1, (axlDestroyFunc) axl_pi_free); result->xmlPreserve = axl_binary_stack_new (); /* create factories */ result->item_factory = axl_item_factory_create (); result->node_factory = axl_node_factory_create (); result->content_factory = axl_item_content_factory_create (); result->attr_factory = axl_item_attr_factory_create (); result->str_factory = axl_string_factory_create (); /* check memory allocation problem */ if (result->parentNode == NULL || result->piTargets == NULL || result->xmlPreserve == NULL || result->item_factory == NULL || result->node_factory == NULL || result->content_factory == NULL || result->attr_factory == NULL || result->str_factory == NULL) { axl_doc_free (result); return NULL; } return result; } /** * @internal * * Clears internal axlDoc variables used mainly to parse documents. * * @param doc The \ref axlDoc to clear */ void __axl_doc_clean (axlDoc * doc) { /* release memory used by the parser */ if (doc->parentNode != NULL) { axl_stack_free (doc->parentNode); doc->parentNode = NULL; } return; } /** * @internal Function used by the axl doc module to allocate memory to * be used by the axl stream. Currently this is used to alloc xml node * names and xml attribute key and its value. The rest of items are * allocated by the system memory allocation. * * @param size The size that is required by the axl stream to be allocated. * * @param doc The axlDoc reference, which contains a reference to the * string factory used to allocate memory. * * @return A reference to the allocated memory. */ char * __axl_doc_alloc (int size, axlDoc * doc) { /* just return a piece of memory */ return axl_string_factory_alloc (doc->str_factory, size); } /** * @internal Internal function that tries to check encoding found to * configure the proper set of functions to translate from and to * utf-8. * * @param doc The document being configured. * * @param error An optional error that will be filled in the case an * error is found. * * @return axl_true if the operation was completed, otherwise axl_false is * returned. */ axl_bool axl_doc_configure_encoding (axlDoc * doc, axlStream * stream, axlError ** error) { char * encoding = NULL; axl_bool result; /* normalize encoding found */ if (doc->encoding) { /* copy encoding */ encoding = axl_strdup (doc->encoding); /* trim encoding */ axl_stream_trim (encoding); /* remove characters not required */ axl_stream_remove (encoding, "-", axl_false); axl_stream_remove (encoding, "_", axl_false); /* make it lower case */ axl_stream_to_lower (encoding); } /* end if */ __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "configuring final document enconding, previously detected=%s, declared=%s", doc->detected_encoding ? doc->detected_encoding : "none", encoding ? encoding : "none"); /* do not perform any configuration if nothing is defined */ if (! configure_codification_func) { axl_free (encoding); return axl_true; } /* call to configure encoding */ result = configure_codification_func (stream, encoding, doc->detected_encoding, configure_codification_data, error); __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "result from configure encoding function=%d", result); if (result) { /* encoding was fine, that means we are working in * utf-8, udate document internals to move encoding to * utf-8 */ doc->encoding_found = encoding; encoding = NULL; /* reset encoding found to the new value */ if (doc->encoding) axl_free (doc->encoding); doc->encoding = axl_strdup ("utf-8"); } axl_free (encoding); return result; } /** * @internal * * @brief Support for parsing the xml entity header * * @param stream The axlStream where is expected to receive the xml * header * * @param doc The axlDoc where the header configuration will be * placed. * * @param error An optional error that will be filled in the case an * error is found. * * @return It is supposed that the function return \ref axl_true, an * not deallocation is performed, and all elements were parsed * properly. In the case \ref axl_false is returned, memory associated * with the given stream will be released. If the document is * associated, it will also be released. */ axl_bool __axl_doc_parse_xml_header (axlStream * stream, axlDoc * doc, axlError ** error) { char * string_aux; int size; /* check if the user is defining the header many times */ if (doc->headerProcess) { axl_error_new (-1, "Found a new xml header expecification. Only one header is allowed for each xml document.", stream, error); axl_stream_free (stream); return axl_false; } __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "looking for an xml header declaration"); /* check for boms declarations */ if (axl_stream_check (stream, "\xEF\xBB\xBF", 3)) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, " found UTF-8 bom mark"); axl_stream_move (stream, 3); } /* check for initial XMLDec (production 23) */ if (axl_stream_inspect (stream, " 0)) { axl_error_new (-2, "expected initial 0) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found encoding declaration"); /* found encoding instruction */ string_aux = axl_stream_get_until (stream, NULL, NULL, axl_true, 2, "'", "\""); if (string_aux == NULL) { axl_error_new (-2, "expected encoding value, not found.", stream, error); axl_stream_free (stream); return axl_false; } __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "encoding found=%s", string_aux); /* set document encoding: do not allocate * twice the string returned, just nullify * stream internal reference and use the same * reference */ axl_stream_nullify (stream, LAST_CHUNK); doc->encoding = string_aux; } /* check for an space */ AXL_CONSUME_SPACES(stream); /* get standalone configuration */ if ((axl_stream_inspect_several (stream, 2, "standalone=\"", 12, "standalone='", 12) > 0)) { /* found standalone instruction */ string_aux = axl_stream_get_until (stream, NULL, NULL, axl_true, 2, "'", "\""); if (string_aux == NULL) { axl_error_new (-2, "expected to receive standalone value, not found.", stream, error); axl_stream_free (stream); return axl_false; } /* set standalone configuration */ if (memcmp ("yes", string_aux, 3)) doc->standalone = axl_false; else doc->standalone = axl_true; } /* check for an space */ AXL_CONSUME_SPACES(stream); /* get the trailing header */ if (! (axl_stream_inspect (stream, "?>", 2) > 0)) { axl_error_new (-2, "expected to receive the xml trailing header ?>, not found.", stream, error); axl_stream_free (stream); return axl_false; } /* consume a possible comment */ if (! axl_doc_consume_comments (doc, stream, error)) return axl_false; } /* configure encoding again, now we could have more data */ if (! axl_doc_configure_encoding (doc, stream, error)) { axl_stream_free (stream); return axl_false; } /* now process the document type declaration */ if (axl_stream_inspect (stream, " 0) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found doc type declaration.."); /* found document type declaration, just skip it for * now */ axl_stream_get_until_ref (stream, NULL, NULL, axl_true, &size, 1, ">"); /* consume a possible comment */ if (! axl_doc_consume_comments (doc, stream, error)) return axl_false; } /* return AXL_TRUE value */ return axl_true; } /** * @internal * * @brief Tries to parse the first (and compulsory) node that the xml * document must have. * * The very minimal expresion of an xml document is the one defined by * only one node, with no content and no attributes. This minimal xml * could be defined as: * * \code * * \endcode * * Or the other form accepted: * * \code * * \endcode * * * * * @param stream The \ref axlStream object where is expected to find * the xml node content. * * @param doc The \ref axlDoc object where node read will be placed * inside. * * @param node The node that has been added due to calling to this * function. * * @param error An optional error reporting variable to used to report * upper level the error found. * * @return axl_true if the first node was successfully parsed or * axl_false if not. If the function find something wrong the document * is unrefered. */ axl_bool __axl_doc_parse_node (axlStream * stream, axlDoc * doc, axlNode ** calling_node, axl_bool * is_empty, axlError ** error) { char * string_aux; char * string_aux2; axlNode * node; int matched_chunk; int length; axl_bool delim; /* consume a possible comment */ if (! axl_doc_consume_comments (doc, stream, error)) return axl_false; /* check for initial < definition */ if (! (axl_stream_inspect (stream, "<", 1) > 0) && ! axl_stream_remains (stream)) { /* check if we are reading the first node node */ if (doc->rootNode == NULL) axl_error_new (-2, "expected initial < for a root node definition, not found. An xml document must have, at least, one node definition.", stream, error); else axl_error_new (-2, "expected initial < for a node definition, not found.", stream, error); axl_stream_free (stream); return axl_false; } /* get node name, keeping in mind the following: * chunk_matched * > : 0 * /> : 1 * " ": 2 * * We also reconfigure the alloc method used by the axl stream * to ensure that the module name is allocated through the * string factory. */ axl_stream_set_buffer_alloc (stream, (axlStreamAlloc)__axl_doc_alloc, doc); string_aux = axl_stream_get_until (stream, NULL, &matched_chunk, axl_true, 2, ">", " "); /* nullify */ axl_stream_nullify (stream, LAST_CHUNK); if (AXL_IS_STR_EMPTY (string_aux)) { /* use alloc though string factory */ axl_stream_set_buffer_alloc (stream, NULL, NULL); axl_error_new (-2, "expected an non empty content for the node name not found.", stream, error); axl_stream_free (stream); return axl_false; } /* if found a '/', it is matched as 1 */ if (matched_chunk == 1) matched_chunk = 2; else { /* get the string length */ length = strlen (string_aux); /* if matched / it means that it was read />, remove * it and all white spaces */ if (string_aux[length - 1] == '/') { /* flag as matched /> */ matched_chunk = 1; string_aux[length - 1] = 0; } /* end if */ } /* end if */ /* create the node and associate it the node name found */ node = axl_node_factory_get (doc->node_factory); axl_node_set_name_from_factory (node, string_aux); if (doc->rootNode == NULL) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "setting as first node found, the root node: <%s>", string_aux); doc->rootNode = node; /* set the node read, the root one, to be the parent */ axl_stack_push (doc->parentNode, node); /* configure the node */ axl_node_set_doc (node, doc); } else { /* or set the node as a child of the current parent */ axl_doc_set_child_current_parent (doc, node); } /* set the node created to the calling node, so the caller * could get a reference */ if (calling_node != NULL) *calling_node = node; /* only consume white spaces if matched_chunk is 2 */ if (matched_chunk == 2) { /* get rid from spaces */ AXL_CONSUME_SPACES (stream); } __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "node found: [%s]", string_aux); /* now, until the node ends, we have to find the node * attributes or the node defintion end */ while (1) { /* check if we have an attribute for the node, or the node * definition have ended or the node definition is an empty * one * * the following code that relies on matched_chunk is * done due to previous call to get_until function. If * the value 0 or 1 was matched, this means that we * are on "/>" case */ if ((matched_chunk == 1) || axl_stream_inspect (stream, "/>", 2) > 0) { /* use alloc though string factory */ axl_stream_set_buffer_alloc (stream, NULL, NULL); __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found end xml node definition '/>'"); /* empty node configuration found */ *is_empty = axl_true; /* axl_node_set_is_empty (node, axl_true); */ /* make this node to be completed and no child * could be set. */ axl_stack_pop (doc->parentNode); /* set the parent node to receive all content * found in the next parsing elements because * the element found is totally empty */ *calling_node = axl_stack_peek (doc->parentNode); return axl_true; } /* check if we have an attribute for the node, or the node * definition have ended or the node definition is an empty * one * * the following code that relies on matched_chunk is * done due to previous call to get_until function. If * the value 2 or 3 was matched, this means that we * are on ">" case */ if ((matched_chunk == 0) || (axl_stream_inspect (stream, ">", 1) > 0)) { /* use alloc though string factory */ axl_stream_set_buffer_alloc (stream, NULL, NULL); __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found [end] xml node definition '>', for node: [%s]", axl_node_get_name (node)); /* flag that the node is an empty definition */ *is_empty = axl_false; /* this node is ended */ return axl_true; } /* get rid from spaces */ AXL_CONSUME_SPACES (stream); /* found attribute declaration, try to read it. * * We also reconfigure the alloc method used by the * axl stream to ensure that xml node attributes are * allocated through the string factory. */ string_aux = axl_stream_get_until (stream, NULL, NULL, axl_true, 1, "="); if (string_aux != NULL) { /* nullify internal reference to the stream: * now we have inside string_aux the attribute * name */ axl_stream_nullify (stream, LAST_CHUNK); /* check for empty values at the attribute definition */ if (string_aux [0] == 0) { axl_error_new (-5, "Expected to find an attribute name (but found an empty value)", stream, error); axl_stream_free (stream); return axl_false; } /* end if */ __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "attribute found: [%s]", string_aux); /* remove next " and ' if defined */ /* flag the we are looking for a " */ delim = axl_true; if (! ((axl_stream_inspect (stream, "\"", 1) > 0))) { /* seems it is not found, flag we are * looking for ' */ delim = axl_false; if (! (axl_stream_inspect (stream, "\'", 1) > 0)) { /* use alloc though string factory */ axl_stream_set_buffer_alloc (stream, NULL, NULL); axl_error_new (-2, "Expected to find an attribute value initiator (\") or ('), every attribute value must start with them", stream, error); axl_stream_free (stream); return axl_false; } } /* now get the attribute value */ if (delim) string_aux2 = axl_stream_get_until (stream, NULL, NULL, axl_true, 1, "\""); else string_aux2 = axl_stream_get_until (stream, NULL, NULL, axl_true, 1, "'"); __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "value found: [%s]", string_aux2); /* nullify internal reference so we have the * only one reference to attribute value * inside string_aux2 */ axl_stream_nullify (stream, LAST_CHUNK); if (axl_node_has_attribute (node, string_aux)) { /* parse error */ axl_error_report (error, -3, "Unable to add attribute '%s' to node <%s> which already has this attribute. Duplicate attribute error. Error was found near to: %s", string_aux, axl_node_get_name (node), axl_stream_get_near_to (stream, 100)); axl_stream_free (stream); return axl_false; } /* end if */ /* set a new attribute for the given node */ axl_node_set_attribute_from_factory (doc->attr_factory, node, string_aux, string_aux2); /* check xml:space configuration and update binary stack */ if (axl_cmp (string_aux, "xml:space")) { if (axl_cmp (string_aux2, "preserve")) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found xml:space=preseve, notifying.."); /* 1: xml:space=preserve (found) * make current node and all its childs to preserve (by * default) all white spaces found */ axl_binary_stack_push (doc->xmlPreserve, axl_true); } else if (axl_cmp (string_aux2, "default")) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found xml:space=default, notifying.."); /* 2: xml:space=default (found) * make current node and all its childs to not * preserve white spaces (by default) */ axl_binary_stack_push (doc->xmlPreserve, axl_false); } else { /* parse error */ axl_error_new (-2, "xml:space attribute found with other value than 'preserve' or 'default', this is not allowed.", stream, error); axl_stream_free (stream); return axl_false; } /* end if */ } else { /* 3: xml:space (not found) * make the current node to inherint * default from parent */ if (axl_binary_stack_is_empty (doc->xmlPreserve)) axl_binary_stack_push (doc->xmlPreserve, axl_false); else axl_binary_stack_push_the_same (doc->xmlPreserve); } /* end if */ __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "attribute installed.."); /* get rid from spaces */ AXL_CONSUME_SPACES (stream); continue; } /* if reached this point, error found */ axl_error_new (-2, "Parse error while reading a node being opened", stream, error); axl_stream_free (stream); return axl_false; } /* end while */ /* node properly parsed */ return axl_true; } /** * @internal * @brief Perform the close node operation. * */ axl_bool __axl_doc_parse_close_node (axlStream * stream, axlDoc * doc, axlNode ** _node, axlError ** error) { char * string; int result_size = -1; axlNode * node; /* get the node being closed to check to the current parent */ string = axl_stream_get_until_ref (stream, NULL, NULL, axl_true, &result_size, 1, ">"); if (string == NULL) { axl_error_new (-1, "An error was found while closing the xml node", stream, error); axl_stream_free (stream); return axl_false; } /* check for optional white space inside the trailing result */ if (axl_stream_is_white_space (string + result_size - 1)) { /* nullify to remove the optional white spaces */ string [result_size - 1] = 0; } /* end if */ /* get current parent node */ node = axl_stack_peek (doc->parentNode); if (node == NULL) { axl_error_new (-1, "Found that the stack doesn't have any node opened, this means either an libaxl error or the xml being read is closing a node not opened", stream, error); axl_stream_free (stream); return axl_false; } /* check current axl node name against closed string */ if (axl_cmp (axl_node_get_name (node), string)) { /* ok, axl node to be closed is the one expected */ __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "closing xml node, that matched with parent opened"); return axl_true; } /* seems that the node being closed doesn't match */ __axl_log (LOG_DOMAIN, AXL_LEVEL_CRITICAL, "xml node names to be closed doesn't matched (%s != %s), current node stack status:", axl_node_get_name (node), string); node = axl_stack_pop (doc->parentNode); while (node != NULL) { __axl_log (LOG_DOMAIN, AXL_LEVEL_CRITICAL, "<%s>", axl_node_get_name (node)); node = axl_stack_pop (doc->parentNode); } axl_error_new (-1, "An error was found while closing the opened xml node, parent opened and xml node being closed doesn't match", stream, error); axl_stream_free (stream); return axl_false; } /** * @internal * * Internal function which works as a common base for all functions * that parse XML documents from different inputs. */ axlDoc * __axl_doc_parse_common (const char * entity, int entity_size, const char * file_path, int fd_handle, axlError ** error) { axlStream * stream = NULL; axlDoc * doc = NULL; axlNode * node = NULL; char * string = NULL; int index; axl_bool is_empty = axl_false; /* create the xml stream using provided data */ stream = axl_stream_new (entity, entity_size, file_path, fd_handle, error); axl_return_val_if_fail (stream, NULL); /* create a document reference */ doc = __axl_doc_new (axl_true); axl_stream_link (stream, doc, (axlDestroyFunc) axl_doc_free); /* detect transitional entity codification to configure built * decoder (only if defined handler found) */ if (detect_codification_func) { if (! detect_codification_func (stream, &doc->detected_encoding, detect_codification_data, error)) { axl_stream_free (stream); return NULL; } } /* end if */ /* parse initial xml header */ if (!__axl_doc_parse_xml_header (stream, doc, error)) return NULL; /* signal that this document have processed its header */ doc->headerProcess = axl_true; /* parse the rest of the document, setting as parent NULL * because still no parent is found. */ if (!__axl_doc_parse_node (stream, doc, &node, &is_empty, error)) return NULL; /* if the node returned is not empty */ if (! is_empty) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "the first node ready, have content, reading it"); /* while the stream have data */ while (axl_stream_remains (stream)) { /* get current index */ index = axl_stream_get_index (stream); __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "current index: %d (global: %d)", index, axl_stream_get_global_index (stream)); /* get rid from spaces according to the * xml:space configuration */ if (! axl_binary_stack_peek (doc->xmlPreserve)) { AXL_CONSUME_SPACES(stream); } /* end if */ /* consume a possible comment and process instructions */ if (axl_stream_peek (stream, " 0 || axl_stream_peek (stream, ""); if (content == NULL) { axl_error_new (-1, "detected an opened comment but not found the comment ending", stream, error); axl_stream_free (stream); return axl_false; } /* store it */ if (parent != NULL) axl_node_set_comment (parent, content, size); /* flag that we have found a comment */ found_item = axl_true; } __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "now see for process instructions"); /* get rid from spaces */ AXL_CONSUME_SPACES(stream); /* check for PI, only once the xml header have been processed */ if ((doc != NULL) && doc->headerProcess && (axl_stream_peek (stream, " 0)) { if (! axl_doc_consume_pi (doc, axl_stack_peek (doc->parentNode), stream, error)) return axl_false; found_item = axl_true; } /* do not consume spaces if an item was found because * it is done again at the begin of the loop */ if (! found_item) { /* get rid from spaces */ AXL_CONSUME_SPACES(stream); } /* check to break-the-loop */ }while (found_item); __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "comments and pi parsed"); /* axl_true value */ return axl_true; } /** * @internal * * @brie Consumes Processing intructions that are directed to the * application ans configuration or processing instructions. * * @param doc The document there the information will be placed. * * @param stream The stream where the data is being read. * * @param error An optional axlError where the information will be * reported. * * @return axl_true if not error was found, otherwise AXL_FASLSE is * returned. */ axl_bool axl_doc_consume_pi (axlDoc * doc, axlNode * node, axlStream * stream, axlError ** error) { char * string_aux; char * string_aux2; int matched_chunk; /* check if a PI target was found */ __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "calling to consume PI.."); if (axl_stream_peek (stream, " 0) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found a process instruction initialization"); /* found a pi target initialization */ axl_stream_accept (stream); string_aux = axl_stream_get_until (stream, NULL, &matched_chunk, axl_true, 3, " ?>", "?>", " "); /* check error reported */ if (string_aux == NULL) { axl_error_new (-1, "Found a error while reading the PI target name", stream, error); axl_stream_free (stream); return axl_false; } /* check that the reserved xml word is not used for the PI target */ string_aux2 = axl_strdup (string_aux); if (axl_cmp (axl_stream_to_lower (string_aux2), "xml")) { axl_free (string_aux2); axl_error_new (-1, "Using a reserved PI target name (xml), not allowed", stream, error); axl_stream_free (stream); return axl_false; } axl_free (string_aux2); __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "found PI target name: %s (terminator matched: %d)", string_aux, matched_chunk); /* check which was the matched string */ if (matched_chunk == 0 || matched_chunk == 1) { /* seems that the PI target doesn't have more data associated, craete and return */ if (node != NULL) { axl_node_add_pi_target (node, string_aux, NULL); return axl_true; } if (doc != NULL) axl_doc_add_pi_target (doc, string_aux, NULL); return axl_true; } /* seems that we have additional content to be read */ if (matched_chunk == 2) { /* make a local copy for the PI target name * read previously */ string_aux = axl_strdup (string_aux); /* get the PI content */ string_aux2 = axl_stream_get_until (stream, NULL, NULL, axl_true, 2, " ?>", "?>"); /* check error reported */ if (string_aux2 == NULL) { axl_free (string_aux); axl_error_new (-1, "Found a error while reading the PI content", stream, error); axl_stream_free (stream); return axl_false; } /* check the destination for the pi */ if (node != NULL) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "PI processing finished, adding PI (node) and its content"); axl_node_add_pi_target (node, string_aux, string_aux2); axl_free (string_aux); return axl_true; } if (doc != NULL) { __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "PI processing finished, adding PI (doc) and its content"); axl_doc_add_pi_target (doc, string_aux, string_aux2); axl_free (string_aux); return axl_true; } } /* check error reported */ axl_error_new (-1, "Found a error while reading the PI target name, unable to find PI terminator ?>", stream, error); axl_stream_free (stream); return axl_false; } __axl_log (LOG_DOMAIN, AXL_LEVEL_DEBUG, "PI processing finished"); return axl_true; } /** * @internal Function that allows to get axlFactory associated to * the provided document. * * @param doc The axl document that is requested to return its item * factory. * * @return An internal reference to the item factory. Do not dealloc. */ axlFactory * axl_doc_get_item_factory (axlDoc * doc) { return doc->item_factory; } /** * @brief Allows to configure a handler that implements document * detection and in such cases reconfigure \ref axlStream to act an a * proper manner. * * @param func The function to be configured. * * @param user_data User defined data to be provide to the function. * * @return A reference to the previously configured function. */ axlDocDetectCodification axl_doc_set_detect_codification_func (axlDocDetectCodification func, axlPointer user_data) { axlDocDetectCodification previous; /* configure handler and user defined pointer */ previous = detect_codification_func; detect_codification_func = func; detect_codification_data = user_data; return previous; } /** * @brief Allows to configure the handler used to finally configure * codification to be used for a particular \ref axlStream. * * @param func The function to be called to configure codification. * * @param user_data A reference to user defined data to be passed to * the function. * * @return A refernece to the previous handler configured. */ axlDocConfigureCodification axl_doc_set_configure_codification_func (axlDocConfigureCodification func, axlPointer user_data) { axlDocConfigureCodification previous; /* configure handler and user defined pointer */ previous = configure_codification_func; configure_codification_func = func; configure_codification_data = user_data; return previous; } /* @} */