/* * Copyright 2014-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"). * You may not use this file except in compliance with the License. * A copy of the License is located at: * * http://aws.amazon.com/apache2.0/ * * or in the "license" file accompanying this file. This file is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific * language governing permissions and limitations under the License. */ /* * code for the token scanner for IonC, this is the 2nd version * * while the code for scanning is reasonably large and distinct * the relationship between ion_reader_text and ion_scanner is * pretty blurry. It might make more sense to merge the two * together ... at some point. */ #include #include "ion_internal.h" // this macro is just to keep the lines of code shorter, the do-while // forces the need for a ';' and it executes exactly once // still - use with care it depends on local variables and good behavior! #define PUSH_VALUE_BYTE(x) do { if (remaining <= 0) { FAILWITH(IERR_TOKEN_TOO_LONG); } remaining--; *dst++ = MAKE_BYTE(x); } while(FALSE) static inline BOOL _ion_scanner_is_control_character(int c) { return c <= 0x1F && 0x00 <= c; } static inline BOOL _ion_scanner_is_newline(int c) { return c == 0x0A || c == 0x0D; } static inline BOOL _ion_scanner_is_non_newline_whitespace(int c) { return c == 0x09 || c == 0x0B || c == 0x0C; // Tab, vertical tab, and form feed, respectively. } static inline BOOL _ion_scanner_is_valid_plain_char(int c) { return !_ion_scanner_is_control_character(c) || _ion_scanner_is_non_newline_whitespace(c); } static inline BOOL _ion_scanner_is_valid_long_char(int c) { return !_ion_scanner_is_control_character(c) || _ion_scanner_is_newline(c) || _ion_scanner_is_non_newline_whitespace(c); } static inline BOOL _ion_scanner_is_valid_plain_clob_char(int c) { return _ion_scanner_is_valid_plain_char(c) && c <= 0x7F; // Raw clob bytes must be printable ASCII. } static inline BOOL _ion_scanner_is_valid_long_clob_char(int c) { return _ion_scanner_is_valid_long_char(c) && c <= 0x7F; // Raw clob bytes must be printable ASCII. } static inline iERR _ion_scanner_get_terminator_for_sub_type(ION_SUB_TYPE ist, int* terminator) { iENTER; if (ist == IST_SYMBOL_QUOTED) { *terminator = '\''; } else if (ist == IST_STRING_PLAIN || ist == IST_CLOB_PLAIN) { *terminator = '"'; } else if (ist == IST_STRING_LONG || ist == IST_CLOB_LONG) { *terminator = -1; } else { FAILWITH(IERR_PARSER_INTERNAL); } iRETURN; } iERR _ion_scanner_initialize(ION_SCANNER *scanner, ION_READER *preader) { iENTER; ASSERT(scanner); scanner->_stream = preader->istream; IONCHECK(_ion_reader_text_open_alloc_buffered_string(preader , preader->options.symbol_threshold , &(scanner->_value_image) , &(scanner->_value_buffer) , &(scanner->_value_buffer_length) )); scanner->_value_location = SVL_NONE; IONCHECK(_ion_scanner_reset(scanner)); SUCCEED(); iRETURN; } iERR _ion_scanner_reset(ION_SCANNER *scanner) { iENTER; ASSERT(scanner); IONCHECK(_ion_scanner_reset_value(scanner)); scanner->_line = 1; scanner->_offset = 0; scanner->_saved_offset = 0; scanner->_unread_sub_type = IST_NONE; SUCCEED(); iRETURN; } iERR _ion_scanner_reset_value(ION_SCANNER *scanner) { iENTER; ASSERT(scanner); ION_STRING_INIT(&scanner->_value_image); scanner->_value_location = SVL_NONE; scanner->_value_start = -1; scanner->_pending_bytes_pos = scanner->_pending_bytes; scanner->_pending_bytes_end = scanner->_pending_bytes; SUCCEED(); iRETURN; } // Is this useful just to keep up the pattern (initialize, reset, close) // or just a waste? Hard to say. iERR ion_scanner_close(ION_SCANNER *scanner) { // do we want to memset the scanner to null? scanner->_stream = NULL; // at least the stream, if anyone trys to use this later it will fail pretty quickly return IERR_OK; } iERR _ion_scanner_next(ION_SCANNER *scanner, ION_SUB_TYPE *p_ist) { iENTER; if (scanner->_unread_sub_type != IST_NONE) { *p_ist = scanner->_unread_sub_type; scanner->_value_location = scanner->_unread_value_location; if (scanner->_value_location == SVL_VALUE_IMAGE) { scanner->_value_image.value = scanner->_value_buffer; scanner->_value_image.length = scanner->_unread_value_length; } scanner->_unread_sub_type = IST_NONE; } else { IONCHECK(_ion_scanner_next_actual(scanner, p_ist)); } iRETURN; } iERR _ion_scanner_un_next(ION_SCANNER *scanner, ION_SUB_TYPE ist) { iENTER; ASSERT(scanner->_unread_sub_type == IST_NONE); scanner->_unread_sub_type = ist; scanner->_unread_value_location = scanner->_value_location; scanner->_unread_value_length = scanner->_value_image.length; SUCCEED(); iRETURN; } iERR _ion_scanner_next_actual(ION_SCANNER *scanner, ION_SUB_TYPE *p_ist) { iENTER; ION_SUB_TYPE t = IST_NONE; int c, c2; BOOL is_triple_quote, is_match, is_null; #ifdef DEBUG static long _token_counter = 0; _token_counter++; if (_token_counter == -1) { // 7238 || scanner->_has_marked_value) { ion_helper_breakpoint(); _token_counter = _token_counter + 0;; } #endif // read the first character of a token, tokens may be preceeded // by whitespace IONCHECK(_ion_scanner_read_past_whitespace(scanner, &c)); // it's actually in the stream until it gets read into the value buffer // for punctuation sub types this doesn't matter (there's no // need to look at the value. // for symbols, strings, clobs, and blobs they'll be left in the stream // all others will be copied into value buffer // when someone copies the bytes from the symbol, string, blob, or clob // out of the stream and into their buffer (and we get to the end of the // value) we'll mark is as no longer in the stream at that time // // but the common case is not, so we'll set this to "in string" when we know scanner->_value_location = SVL_NONE; scanner->_value_start = ion_stream_get_position( scanner->_stream ) - 1; // -1 because we read past the byte switch (c) { case EOF: t = IST_EOF; break; case ':': // since we check for double colon using the peek function // (above), there's only 1 possibility now t = IST_SINGLE_COLON; break; case '{': IONCHECK(_ion_scanner_read_char(scanner, &c2)); if (c2 != '{') { IONCHECK(_ion_scanner_unread_char(scanner, c2)); t = IST_STRUCT; } else { t = IST_DOUBLE_BRACE; } break; case '}': t = IST_CLOSE_SINGLE_BRACE; // we don't have enough context here to decide if this is a single of a double close brace unambiguously break; case '[': t = IST_LIST; break; case ']': t = IST_CLOSE_BRACKET; break; case '(': t = IST_SEXP; break; case ')': t = IST_CLOSE_PAREN; break; case ',': t = IST_COMMA; break; case '\'': IONCHECK(_ion_scanner_peek_two_single_quotes(scanner, &is_triple_quote)); if (is_triple_quote) { t = IST_STRING_LONG; } else { t = IST_SYMBOL_QUOTED; } scanner->_value_location = SVL_IN_STREAM; break; case '+': IONCHECK(_ion_scanner_read_char(scanner, &c2)); if (c2 == 'i') { IONCHECK(_ion_scanner_peek_keyword(scanner, "nf", &is_match)); if (is_match) { t = IST_PLUS_INF; break; } } IONCHECK(_ion_scanner_unread_char(scanner, c2)); // fall through to the extended symbol case case '<': case '>': case '*': case '=': case '^': case '&': case '|': case '~': case ';': case '!': case '?': case '@': case '%': case '`': case '#': case '.': case '/': IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_SYMBOL_EXTENDED; scanner->_value_location = SVL_IN_STREAM; break; case '"': t = IST_STRING_PLAIN; scanner->_value_location = SVL_IN_STREAM; break; case 'n': // for 'n' we check to see if this is "null" or a typed // null (such as null.int) IONCHECK(_ion_scanner_peek_for_null(scanner, &is_null, &c)); if (is_null) { // see if the terminating character was a '.' which would preceed a type name if (c != '.') { IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_NULL_NULL; } else { IONCHECK(_ion_scanner_read_null_type(scanner, &t)); } break; } // we also have to check for nan, since that's not a plain symbol IONCHECK(_ion_scanner_peek_keyword(scanner, "an", &is_match)); if (is_match) { t = IST_NAN; break; } // let the non-null 'n' fall through to the plain symbol case case 'a': case 'b': case 'c': case 'd': case 'e': // no 'f' case 'g': case 'h': case 'j': case 'i': case 'k': case 'l': case 'm': case 'o': case 'p': case 'q': case 'r': // no 'n' case 's': case 'u': case 'v': case 'w': case 'x': // not 't' case 'y': case 'z': case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'J': case 'I': case 'K': case 'L': case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': case 'Y': case 'Z': case '$': case '_': IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_SYMBOL_PLAIN; scanner->_value_location = SVL_IN_STREAM; break; case 't': IONCHECK(_ion_scanner_peek_keyword(scanner, "rue", &is_match)); if (is_match) { t = IST_BOOL_TRUE; } else { IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_SYMBOL_PLAIN; scanner->_value_location = SVL_IN_STREAM; } break; case 'f': IONCHECK(_ion_scanner_peek_keyword(scanner, "alse", &is_match)); if (is_match) { t = IST_BOOL_FALSE; } else { IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_SYMBOL_PLAIN; scanner->_value_location = SVL_IN_STREAM; } break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': IONCHECK(_ion_scanner_read_possible_number(scanner, c, 1, &t)); // note _ion_scanner_read_possible_number sets: scanner->_value_location = SVL_VALUE_IMAGE; break; case '-': // see if we have a real number or what might be an extended symbol IONCHECK(_ion_scanner_read_char(scanner, &c2)); if (c2 == 'i') { IONCHECK(_ion_scanner_peek_keyword(scanner, "nf", &is_match)); if (is_match) { t = IST_MINUS_INF; break; } } // not "inf" so is it a number? if (IS_1_BYTE_UTF8(c2) && isdigit(c2)) { IONCHECK(_ion_scanner_read_possible_number(scanner, c2, -1, &t)); scanner->_value_location = SVL_VALUE_IMAGE; } else { // otherwise it must be an extended symbol and we need to // put these characters back (the '-' and whatever else we read) IONCHECK(_ion_scanner_unread_char(scanner, c2)); IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_SYMBOL_EXTENDED; scanner->_value_location = SVL_IN_STREAM; } break; default: FAILWITH(IERR_INVALID_TOKEN); // "invalid character for token start" } *p_ist = t; iRETURN; } iERR _ion_scanner_next_distinguish_lob(ION_SCANNER *scanner, ION_SUB_TYPE *p_ist) { iENTER; int c; BOOL is_triple_quote; ION_SUB_TYPE t = IST_ERROR; IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); if (c == '"') { t = IST_CLOB_PLAIN; } else if (c == '\'') { IONCHECK(_ion_scanner_peek_two_single_quotes(scanner, &is_triple_quote)); if (is_triple_quote) { t = IST_CLOB_LONG; } else { FAILWITH(IERR_BAD_BASE64_BLOB); } } else if (IS_1_BYTE_UTF8(c) == FALSE) { FAILWITH(IERR_BAD_BASE64_BLOB); } else if (_Ion_base64_value[c] >= 0) { IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_BLOB; } else if (c == '}') { // immediate closeing braces says this is an empty blob IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '}') { // we'll want the closing braces around to stop the base64 scan IONCHECK(_ion_scanner_unread_char(scanner, c)); IONCHECK(_ion_scanner_unread_char(scanner, c)); t = IST_BLOB; } else { FAILWITH(IERR_BAD_BASE64_BLOB); } } else { FAILWITH(IERR_BAD_BASE64_BLOB); } scanner->_value_location = SVL_IN_STREAM; *p_ist = t; iRETURN; } iERR _ion_scanner_read_char(ION_SCANNER *scanner, int *p_char) { iENTER; int c; ION_GET(scanner->_stream, c); scanner->_offset++; if (c == '\r' || c == '\n') { IONCHECK(_ion_scanner_read_char_newline_helper(scanner, &c)); } *p_char = c; iRETURN; } iERR _ion_scanner_read_char_with_validation(ION_SCANNER* scanner, ION_SUB_TYPE ist, int* result) { iENTER; int c; _ion_scanner_read_char(scanner, &c); BOOL is_valid; if (ist == IST_SYMBOL_QUOTED) { is_valid = TRUE; } else if (ist == IST_STRING_PLAIN) { is_valid = _ion_scanner_is_valid_plain_char(c); } else if (ist == IST_CLOB_PLAIN) { is_valid = _ion_scanner_is_valid_plain_clob_char(c); } else if (ist == IST_STRING_LONG) { is_valid = _ion_scanner_is_valid_long_char(c); } else if (ist == IST_CLOB_LONG) { is_valid = _ion_scanner_is_valid_long_clob_char(c); } else { FAILWITH(IERR_PARSER_INTERNAL); } if (!is_valid) { char error_message[ION_ERROR_MESSAGE_MAX_LENGTH]; snprintf(error_message, ION_ERROR_MESSAGE_MAX_LENGTH, "Invalid character 0x%04X", c); FAILWITHMSG(IERR_INVALID_SYNTAX, error_message); } else { *result = c; } iRETURN; } iERR _ion_scanner_read_char_newline_helper(ION_SCANNER *scanner, int *p_char) { iENTER; int c, newline; ASSERT(p_char && scanner); c = *p_char; if (c != '\r') { ASSERT(c == '\n'); newline = NEW_LINE_1; } else { // it was \r - is there a \n? ION_GET(scanner->_stream, c); if (c == '\n') { newline = NEW_LINE_2; } else { // not a new line, we have to push it back newline = NEW_LINE_3; IONCHECK(ion_stream_unread_byte(scanner->_stream, c)); } } // there are (currently) no states where it's necessary // to pre-read over more than a single new line, so we // only need 1 saved offset scanner->_saved_offset = scanner->_offset; scanner->_line++; scanner->_offset = 0; *p_char = newline; iRETURN; } iERR _ion_scanner_read_past_whitespace(ION_SCANNER *scanner, int *p_char) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case ION_unicode_byte_order_mark_utf8_start: IONCHECK(_ion_scanner_read_past_unicode_byte_order_mark(scanner, &c)); if (c != ' ') goto actual_char; break; case '/': IONCHECK(_ion_scanner_read_past_comment(scanner, &c)); if (c != ' ') goto actual_char; break; case '\0': case ' ': case '\t': case '\v': /* Vertical tab */ case '\f': /* Form feed */ case NEW_LINE_3: /* carraige return */ case NEW_LINE_2: /* carraige return, newline */ case NEW_LINE_1: /* newline */ break; default: goto actual_char; } } actual_char: *p_char = c; iRETURN; } iERR _ion_scanner_read_past_lob_whitespace(ION_SCANNER *scanner, int *p_char) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case ION_unicode_byte_order_mark_utf8_start: IONCHECK(_ion_scanner_read_past_unicode_byte_order_mark(scanner, &c)); if (c != ' ') goto actual_char; break; case '\0': case ' ': case '\t': case '\v': /* Vertical tab */ case '\f': /* Form feed */ case NEW_LINE_3: /* carraige return */ case NEW_LINE_2: /* carraige return, newline */ case NEW_LINE_1: /* newline */ break; default: goto actual_char; } } actual_char: *p_char = c; iRETURN; } iERR _ion_scanner_read_past_unicode_byte_order_mark(ION_SCANNER *scanner, int *p_char) { iENTER; int c; IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == ION_unicode_byte_order_mark_utf8[1]) { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == ION_unicode_byte_order_mark_utf8[2]) { *p_char = ' '; SUCCEED(); } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = ION_unicode_byte_order_mark_utf8[1]; } IONCHECK(_ion_scanner_unread_char(scanner, c)); iRETURN; } iERR _ion_scanner_read_past_comment(ION_SCANNER *scanner, int *p_char) { iENTER; int c; IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case '/': // we have a single line comment IONCHECK(_ion_scanner_read_to_one_line_comment(scanner)); *p_char = ' '; break; case '*': // we have a multi-line comment (or at least not a single line comment) IONCHECK(_ion_scanner_read_to_end_of_long_comment(scanner)); *p_char = ' '; break; default: // oops, we don't want to disturb this one, just throw it back // note we don't do anything to p_char since we want to leave it alone too IONCHECK(_ion_scanner_unread_char(scanner, c)); break; } iRETURN; } iERR _ion_scanner_read_to_one_line_comment(ION_SCANNER *scanner) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { // these are escaped new lines, they act as nothing which // in this case is just whitespace we'll read past case NEW_LINE_3: /* carraige return */ case NEW_LINE_2: /* carraige return, newline */ case NEW_LINE_1: /* newline */ case SCANNER_EOF: goto end_of_comment; default: break; } } end_of_comment: iRETURN; } iERR _ion_scanner_read_to_end_of_long_comment(ION_SCANNER *scanner) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '*') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '/') goto end_of_comment; } else if (c == SCANNER_EOF) { FAILWITH(IERR_UNEXPECTED_EOF); } } end_of_comment: iRETURN; } iERR _ion_scanner_unread_char(ION_SCANNER *scanner, int c) { iENTER; switch(c) { case NEW_LINE_1: IONCHECK(ion_stream_unread_byte(scanner->_stream, '\n')); goto uncount_line; case NEW_LINE_2: IONCHECK(ion_stream_unread_byte(scanner->_stream, '\n')); // remember - push is reverse order IONCHECK(ion_stream_unread_byte(scanner->_stream, '\r')); goto uncount_line; case NEW_LINE_3: IONCHECK(ion_stream_unread_byte(scanner->_stream, '\r')); goto uncount_line; case EMPTY_ESCAPE_SEQUENCE1: IONCHECK(ion_stream_unread_byte(scanner->_stream, '\n')); IONCHECK(ion_stream_unread_byte(scanner->_stream, '\\')); goto uncount_line; case EMPTY_ESCAPE_SEQUENCE2: IONCHECK(ion_stream_unread_byte(scanner->_stream, '\n')); // remember - push is reverse order IONCHECK(ion_stream_unread_byte(scanner->_stream, '\r')); IONCHECK(ion_stream_unread_byte(scanner->_stream, '\\')); goto uncount_line; case EMPTY_ESCAPE_SEQUENCE3: IONCHECK(ion_stream_unread_byte(scanner->_stream, '\r')); IONCHECK(ion_stream_unread_byte(scanner->_stream, '\\')); goto uncount_line; default: IONCHECK(ion_stream_unread_byte(scanner->_stream, c)); scanner->_offset--; break; } SUCCEED(); uncount_line: _ion_scanner_unread_char_uncount_line(scanner); SUCCEED(); iRETURN; } void _ion_scanner_unread_char_uncount_line(ION_SCANNER *scanner) { scanner->_line--; scanner->_offset = scanner->_saved_offset; } iERR _ion_scanner_peek_double_colon(ION_SCANNER *scanner, BOOL *p_is_double_colon) { iENTER; int c; BOOL is_double_colon = FALSE; IONCHECK(_ion_scanner_read_past_whitespace(scanner, &c)); if (c != ':') { IONCHECK(_ion_scanner_unread_char(scanner, c)); } else { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c != ':') { IONCHECK(_ion_scanner_unread_char(scanner, c)); } else { is_double_colon = TRUE; } } *p_is_double_colon = is_double_colon; iRETURN; } iERR _ion_scanner_peek_two_single_quotes(ION_SCANNER *scanner, BOOL *p_rest_of_triple_quote_found) { iENTER; int c; BOOL rest_of_triple_quote_found = FALSE; IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { rest_of_triple_quote_found = TRUE; goto found; } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = '\''; // restore c so that we when we undo just below it's the right character } IONCHECK(_ion_scanner_unread_char(scanner, c)); found: *p_rest_of_triple_quote_found = rest_of_triple_quote_found; iRETURN; } iERR _ion_scanner_peek_for_null(ION_SCANNER *scanner, BOOL *p_is_null, int *p_char) { iENTER; int c; BOOL is_terminator; // we're looking for an the "ull" (and a possible ".") following the "n" // we saw at the beginning of our token IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == 'u') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == 'l') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == 'l') { IONCHECK(_ion_scanner_read_char(scanner, &c)); // it's either a valid unquoted symbol character (in which case this // isn't "null") or the dot (in which case it is) or something else // is which case we'll call it a null and let the symbol reader sort it out IONCHECK(_ion_scanner_is_value_terminator(scanner, c, &is_terminator)); if (is_terminator || c == '.') { *p_is_null = TRUE; *p_char = c; SUCCEED(); } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = 'l'; } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = 'l'; } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = 'u'; } IONCHECK(_ion_scanner_unread_char(scanner, c)); *p_is_null = FALSE; SUCCEED(); iRETURN; } iERR _ion_scanner_read_null_type(ION_SCANNER *scanner, ION_SUB_TYPE *p_ist) { iENTER; int c; char unread_buffer[MAX_TYPE_NAME_LEN + 1], *unread_pos = unread_buffer; // +1 for null terminator SIZE len, remaining = MAX_TYPE_NAME_LEN + 1; ION_SUB_TYPE ist; BOOL is_terminator; // we're looking for an the "nf" following the "i" and the // sign we saw at the beginning of our token while (remaining--) { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c < 'a' || c > 'z') { IONCHECK(_ion_scanner_unread_char(scanner, c)); break; } *unread_pos++ = MAKE_BYTE(c); } len = unread_pos - unread_buffer; if (len < MIN_TYPE_NAME_LEN) goto not_a_typename; IONCHECK(_ion_scanner_is_value_terminator(scanner, c, &is_terminator)); if (!is_terminator) goto not_a_typename; *unread_pos = '\0'; ist = _ion_scanner_check_typename(unread_buffer, len); if (ist == NULL) { goto not_a_typename; } *p_ist = ist; SUCCEED(); not_a_typename: FAILWITH(IERR_INVALID_TOKEN); iRETURN; } ION_SUB_TYPE _ion_scanner_check_typename(char *buf, int len) { if (len < MIN_TYPE_NAME_LEN || len > MAX_TYPE_NAME_LEN) return NULL; // check the string against type names or fail switch(buf[0]) { case 'b': if (len != 4) return 0; if (strncmp("bool", buf, 4) == 0) return IST_NULL_BOOL; if (strncmp("blob", buf, 4) == 0) return IST_NULL_BLOB; break; case 'c': if (len != 4) return 0; if (strncmp("clob", buf, 4) == 0) return IST_NULL_CLOB; break; case 'd': if (len != 7) return 0; if (strncmp("decimal", buf, 7) == 0) return IST_NULL_DECIMAL; break; case 'f': if (len != 5) return 0; if (strncmp("float", buf, 5) == 0) return IST_NULL_FLOAT; break; case 'i': if (len != 3) return 0; if (strncmp("int", buf, 3) == 0) return IST_NULL_INT; break; case 'l': if (len != 4) return 0; if (strncmp("list", buf, 4) == 0) return IST_NULL_LIST; break; case 'n': if (len != 4) return 0; if (strncmp("null", buf, 4) == 0) return IST_NULL_NULL; break; case 's': if (len == 6) { if (strncmp("string", buf, 6) == 0) return IST_NULL_STRING; if (strncmp("struct", buf, 6) == 0) return IST_NULL_STRUCT; if (strncmp("symbol", buf, 6) == 0) return IST_NULL_SYMBOL; } else if (len == 4) { if (strncmp("sexp", buf, 4) == 0) return IST_NULL_SEXP; } break; case 't': if (len != 9) return 0; if (strncmp("timestamp", buf, 9) == 0) return IST_NULL_TIMESTAMP; break; default: break; } return 0; } // c is an already read-ahead character iERR _ion_scanner_is_value_terminator(ION_SCANNER *scanner, int c, BOOL *p_is_terminator) { iENTER; ION_TERM_TYPE maybe; int c2; BOOL is_terminator = FALSE; if (c < 0) { is_terminator = TRUE; } else { maybe = _Ion_value_terminators[c]; if (maybe == ION_TT_MAYBE) { IONCHECK(_ion_scanner_read_char(scanner, &c2)); if (c < 0) { maybe = IS_NEWLINE_SEQUENCE(c2); } else { maybe = _Ion_value_terminators2[c2]; } IONCHECK(_ion_scanner_unread_char(scanner, c2)); } is_terminator = (maybe == ION_TT_YES); } *p_is_terminator = is_terminator; SUCCEED(); iRETURN; } iERR _ion_scanner_peek_keyword(ION_SCANNER *scanner, char *tail, BOOL *p_is_match) { iENTER; int c, match_c; char *cp = tail; BOOL is_match = FALSE; // we're looking for an the "nf" following the "i" and the // sign we saw at the beginning of our token while ((match_c = *cp) != '\0') { cp++; IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c != match_c) { cp--; // we didn't match this char, so we don't unread it (we unread c which we just read) goto unread_tail; } } // we may have a match (if we have a terminator next) // we'll peek ahead and see if we have that terminator IONCHECK(_ion_scanner_read_char(scanner, &c)); IONCHECK(_ion_scanner_is_value_terminator(scanner, c, &is_match)); unread_tail: IONCHECK(_ion_scanner_unread_char(scanner, c)); if (!is_match) { // no luck so we unread while (cp > tail) { cp--; c = *cp; IONCHECK(_ion_scanner_unread_char(scanner, c)); } } *p_is_match = is_match; SUCCEED(); iRETURN; } /** This is called when we are in the middle of reading a value or * are at the beginning of a value and the user call next(). * * this scans and does not fully validate the value. It should * leave the input stream at the same point simply consuming the * value normally would. * * this routine is more generous about parsing. Basically it just * recognized possible escape sequences in strings, container start * and end tokens (including lobs), and comments. It just searches * for the desired end character which should mark the end of * this value. (strings include all quoted sequences) */ iERR _ion_scanner_skip_value_contents(ION_SCANNER *scanner, ION_SUB_TYPE ist) { iENTER; assert(scanner); // we should never be skipping an unread value, we haven't // actually processed it yet ASSERT(scanner->_unread_sub_type == IST_NONE); // if it's not in the stream the bytes have already been consumed // WAS:if (scanner->_value_location != SVL_IN_STREAM) { // SUCCEED(); //} if ((scanner->_value_location == SVL_IN_STREAM) || (ist == IST_SEXP) || (ist == IST_LIST) || (ist == IST_STRUCT) ) { // see what type of value we are finding the end of if (ist == IST_STRING_PLAIN) { IONCHECK(_ion_scanner_skip_plain_string(scanner)); } else if (ist == IST_STRING_LONG) { IONCHECK(_ion_scanner_skip_long_string(scanner)); } else if (ist == IST_CLOB_PLAIN) { IONCHECK(_ion_scanner_skip_plain_clob(scanner)); } else if (ist == IST_CLOB_LONG) { IONCHECK(_ion_scanner_skip_long_clob(scanner)); } else if (ist == IST_BLOB) { IONCHECK(_ion_scanner_skip_blob(scanner)); } else if (ist == IST_SEXP) { IONCHECK(_ion_scanner_skip_sexp(scanner)); } else if (ist == IST_LIST) { IONCHECK(_ion_scanner_skip_list(scanner)); } else if (ist == IST_STRUCT) { IONCHECK(_ion_scanner_skip_struct(scanner)); } else if (ist == IST_EOF) { /* do nothing, but eof is fine */ } else { FAILWITH(IERR_PARSER_INTERNAL); } } else { SUCCEED(); } // the value should be gone now scanner->_value_location = SVL_NONE; iRETURN; } iERR _ion_scanner_skip_plain_string(ION_SCANNER *scanner) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case '"': SUCCEED(); case '\\': // we just ignore the char after the escape, which is enough to // handle escaped chars correctly (including escaped / and ") IONCHECK(_ion_scanner_read_char(scanner, &c)); break; case EOF: FAILWITH(IERR_UNEXPECTED_EOF); default: break; } } iRETURN; } iERR _ion_scanner_skip_long_string(ION_SCANNER *scanner) { iENTER; int c; for (;;) { // find the end of the current part of the lonng // string IONCHECK(_ion_scanner_skip_one_long_string(scanner)); // see if there is another one adjacent to this // which would be part of the same value. we do this // by peeking ahead for a triple quote IONCHECK(_ion_scanner_read_past_whitespace(scanner, &c)); if (c == '\'') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { // we found the triple quote, loop around to consume // the characters that are quoted and try for another continue; } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = '\''; } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = '\''; } // the next token wasn't a triple quote, so we're actually done // and here we don't care if it happend to be an EOF IONCHECK(_ion_scanner_unread_char(scanner, c)); SUCCEED(); } iRETURN; } iERR _ion_scanner_skip_one_long_string(ION_SCANNER *scanner) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case '\'': IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c != '\'') break; IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c != '\'') break; SUCCEED(); case '\\': // we just ignore the char after the escape, which is enough to // handle escaped chars correctly (including escaped / and ") IONCHECK(_ion_scanner_read_char(scanner, &c)); break; case EOF: FAILWITH(IERR_UNEXPECTED_EOF); default: break; } } iRETURN; } iERR _ion_scanner_skip_single_quoted_string(ION_SCANNER *scanner) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case '\'': SUCCEED(); case '\\': // we just ignore the char after the escape, which is enough to // handle escaped chars correctly (including escaped / and ") IONCHECK(_ion_scanner_read_char(scanner, &c)); break; case EOF: FAILWITH(IERR_UNEXPECTED_EOF); default: break; } } iRETURN; } iERR _ion_scanner_skip_unknown_lob(ION_SCANNER *scanner) { iENTER; int c; IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); switch (c) { case '\'': // if we see a single quote, try to skip a long clob (one with a // triple-quoted string) IONCHECK(_ion_scanner_skip_long_clob(scanner)); break; case '\"': // if we see a double quote, try to skip a plain clob IONCHECK(_ion_scanner_skip_plain_clob(scanner)); break; default: // else try to skip a regular blob IONCHECK(_ion_scanner_skip_blob(scanner)); break; } iRETURN; } iERR _ion_scanner_skip_plain_clob(ION_SCANNER *scanner) { iENTER; int c; // consume the string part of the value IONCHECK(_ion_scanner_skip_plain_string(scanner)); // we should see a double closing curly brace next IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); if (c == '}') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '}') { SUCCEED(); } } if (c == EOF) { FAILWITH(IERR_UNEXPECTED_EOF); } else { FAILWITH(IERR_INVALID_SYNTAX); } iRETURN; } iERR _ion_scanner_skip_long_clob(ION_SCANNER *scanner) { iENTER; int c; // consume the string part of the value IONCHECK(_ion_scanner_skip_long_string(scanner)); // we should see a double closing curly brace next IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); if (c == '}') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '}') { SUCCEED(); } } if (c == EOF) { FAILWITH(IERR_UNEXPECTED_EOF); } else { FAILWITH(IERR_INVALID_SYNTAX); } iRETURN; } iERR _ion_scanner_skip_blob(ION_SCANNER *scanner) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case '}': IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c != '}') break; SUCCEED(); case EOF: FAILWITH(IERR_UNEXPECTED_EOF); default: break; } } iRETURN; } iERR _ion_scanner_skip_sexp(ION_SCANNER *scanner) { iENTER; IONCHECK(_ion_scanner_skip_container(scanner, ')')); iRETURN; } iERR _ion_scanner_skip_list(ION_SCANNER *scanner) { iENTER; IONCHECK(_ion_scanner_skip_container(scanner, ']')); iRETURN; } iERR _ion_scanner_skip_struct(ION_SCANNER *scanner) { iENTER; IONCHECK(_ion_scanner_skip_container(scanner, '}')); iRETURN; } iERR _ion_scanner_skip_container(ION_SCANNER *scanner, int close_char) { iENTER; int c; for (;;) { IONCHECK(_ion_scanner_read_past_whitespace(scanner, &c)); just_another_char: // yes this is evil switch (c) { case '"': IONCHECK(_ion_scanner_skip_plain_string(scanner)); break; case '\'': IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { IONCHECK(_ion_scanner_skip_one_long_string(scanner)); } else { goto just_another_char; // very evil - but I don't want to have to unread end reread this char } } else { IONCHECK(_ion_scanner_skip_single_quoted_string(scanner)); } break; case '{': IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '{') { IONCHECK(_ion_scanner_skip_unknown_lob(scanner)); } else if (c == '}') { // do nothing, we've just finished an empty struct } else { if (c == '"') { IONCHECK(_ion_scanner_skip_plain_string(scanner)) } else if (c == '\'') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { IONCHECK(_ion_scanner_skip_one_long_string(scanner)); } } else { IONCHECK(_ion_scanner_skip_single_quoted_string(scanner)); } } IONCHECK(_ion_scanner_skip_container(scanner, '}')); } break; case '[': IONCHECK(_ion_scanner_skip_container(scanner, ']')); break; case '(': IONCHECK(_ion_scanner_skip_container(scanner, ')')); break; case EOF: FAILWITH(IERR_UNEXPECTED_EOF); default: if (c == close_char) { SUCCEED(); } break; } } iRETURN; } iERR _ion_scanner_read_cached_bytes(ION_SCANNER *scanner, BYTE *buf, SIZE len, SIZE *p_bytes_written) { iENTER; BYTE *pb = buf, *sb = scanner->_pending_bytes_pos; ASSERT(buf); ASSERT(len > 0); ASSERT(p_bytes_written); while (scanner->_pending_bytes_pos < scanner->_pending_bytes_end && len--) { *pb++ = *scanner->_pending_bytes_pos++; } // see if we emptied the pending bytes out, if so reset the ptrs if (scanner->_pending_bytes_pos >= scanner->_pending_bytes_end) { scanner->_pending_bytes_pos = scanner->_pending_bytes_end = scanner->_pending_bytes; } *p_bytes_written = pb - buf; SUCCEED(); iRETURN; } iERR _ion_scanner_read_as_string(ION_SCANNER *scanner , BYTE *buf , SIZE len , ION_SUB_TYPE ist , SIZE *p_bytes_written , BOOL *p_eos_encountered ) { iENTER; BYTE *dst = buf; SIZE remaining = len, written; BOOL triple_quote_found, eos_encountered = FALSE; ASSERT(scanner); ASSERT(buf); ASSERT(len > 0); ASSERT(p_bytes_written); ASSERT(p_eos_encountered); ASSERT(scanner->_value_location == SVL_IN_STREAM); if (ist == IST_SYMBOL_PLAIN) { IONCHECK(_ion_scanner_read_as_symbol(scanner, dst, remaining, &written)); dst += written; remaining -= written; eos_encountered = TRUE; } else if (ist == IST_SYMBOL_EXTENDED) { IONCHECK(_ion_scanner_read_as_extended_symbol(scanner, dst, remaining, &written)); dst += written; remaining -= written; eos_encountered = TRUE; } else { int terminator; IONCHECK(_ion_scanner_get_terminator_for_sub_type(ist, &terminator)); // we loop over read as string at least once, but more if this is a long clob (which // is zero or more triple quoted string (but we know we have at least one) for (;;) { IONCHECK(_ion_scanner_read_as_string_to_quote(scanner, dst, remaining, ist, &written, &eos_encountered)); dst += written; remaining -= written; if (!eos_encountered) { // we ran out of space in the buffer before we hit the end of the string break; } if (terminator != -1) { // we only loop for a triple quoted strings, this one is a plain clob so it's not possible break; } // for a triple quoted string there might be another segment waiting IONCHECK(_ion_scanner_peek_for_next_triple_quote(scanner, ist->base_type == tid_CLOB, &triple_quote_found)); if (!triple_quote_found) { // we'll only loop around again to read another triple quoted string // if we actually find another one break; } } if (eos_encountered) { if (ist == IST_CLOB_PLAIN || ist == IST_CLOB_LONG) { // with a clob the string reader does not understand it might // be in a clob, so it doesn't consume or check for the // closing braces, so we have to do that here (since we know // it's a clob) IONCHECK(_ion_scanner_read_lob_closing_braces(scanner)); } } } *p_bytes_written = len - remaining; // we check eos *after* setting the written length as we will be pushing a null // at the end of the value (which will change the "remaining" length, but only on eos if (eos_encountered) { // we took it out of the stream but we don't know where the chars went // if the caller is passing in the value buffer they set this back to SVL_VALUE_IMAGE scanner->_value_location = SVL_NONE; PUSH_VALUE_BYTE('\0'); // we null terminate the last (typically only) buffer of the string } *p_eos_encountered = eos_encountered; iRETURN; } iERR _ion_scanner_read_as_string_to_quote(ION_SCANNER *scanner, BYTE *buf, SIZE len, ION_SUB_TYPE ist, SIZE *p_bytes_written, BOOL *p_eos_encountered) { iENTER; ION_STREAM *stream = scanner->_stream; BOOL is_triple_quote, triple_quote_terminator = FALSE, eos_encountered = FALSE; BYTE *dst = buf; SIZE remaining = len, written; int c, c2; ASSERT(scanner); ASSERT(stream); ASSERT(buf); ASSERT(len > 0); ASSERT(p_bytes_written); ASSERT(p_eos_encountered); ASSERT(scanner->_value_location == SVL_IN_STREAM); int terminator; IONCHECK(_ion_scanner_get_terminator_for_sub_type(ist, &terminator)); if (terminator == -1) { triple_quote_terminator = TRUE; terminator = '\''; } // first we have to check for uncopied utf8 bytes that might have been left // behind in a partial copy (where the original read didn't have a big enough // buffer to hold the utf8) if (scanner->_pending_bytes_end > scanner->_pending_bytes) { IONCHECK(_ion_scanner_read_cached_bytes(scanner, buf, remaining, &written)); remaining -= written; if (remaining < 1) { *p_bytes_written = written; *p_eos_encountered = FALSE; // we can't tell here this if there are chars remaining or not SUCCEED(); } dst += written; } // until we encounter the terminator read, interpret escape sequences, // interpret utf8, write utf8 char out, count bytes written // the terminator is single quote, double quote, triple quote while (remaining > 0) { IONCHECK(_ion_scanner_read_char_with_validation(scanner, ist, &c)); switch (c) { case EOF: FAILWITH(IERR_UNEXPECTED_EOF); case NEW_LINE_1: // for the various forms of end of line case NEW_LINE_2: case NEW_LINE_3: if (!triple_quote_terminator) { FAILWITH(IERR_NEW_LINE_IN_STRING); } c = NEW_LINE_1; // All end of line forms are normalized to LF within quoted text. break; case '\'': if (terminator == c) { if (triple_quote_terminator) { IONCHECK(_ion_scanner_peek_two_single_quotes(scanner, &is_triple_quote)); if (is_triple_quote) { eos_encountered = TRUE; goto end_of_string; } } else { eos_encountered = TRUE; goto end_of_string; } } break; case '\"': if (terminator == c) { eos_encountered = TRUE; goto end_of_string; } break; case '\\': IONCHECK(_ion_scanner_read_escaped_char(scanner, ist, &c)); if (IS_EMPTY_ESCAPE_SEQUENCE(c)) goto dont_write_char; if (ion_isLowSurrogate(c)) { // a loose low surrogate is an invalid character // TODO: we may want to ignore this - hmmm FAILWITH(IERR_INVALID_UTF8); } else if (ion_isHighSurrogate(c)) { IONCHECK(_ion_scanner_read_char(scanner, &c2)); if (c2 == '\\') { IONCHECK(_ion_scanner_read_escaped_char(scanner, ist, &c2)); } if (!ion_isLowSurrogate(c2)) FAILWITH(IERR_INVALID_UTF8); c = ion_makeUnicodeScalar(c, c2); } break; default: if (IS_1_BYTE_UTF8(c)) break; if (ion_isLowSurrogate(c)) { // a loose low surrogate is an invalid character // TODO: we may want to ignore this - hmmm FAILWITH(IERR_INVALID_UTF8); } else if (ion_isHighSurrogate(c)) { IONCHECK(_ion_scanner_read_char(scanner, &c2)); if (c2 == '\\') { IONCHECK(_ion_scanner_read_escaped_char(scanner, ist, &c2)); } if (!ion_isLowSurrogate(c2)) FAILWITH(IERR_INVALID_UTF8); c = ion_makeUnicodeScalar(c, c2); } else { // HACK HACK TODO - this handles utf8 bytes but doesn't check them, which we should be doing PUSH_VALUE_BYTE(c); continue; } // do we need to check anything else here? break; } // here we write the char to the output buffer either the easy way or the hard way if (IS_1_BYTE_UTF8(c)) { PUSH_VALUE_BYTE(c); } else if (ist == IST_CLOB_LONG || ist == IST_CLOB_PLAIN) { // Preceding logic disallows unicode escapes and unescaped bytes above 0x7F. if (c > 0xFF) { FAILWITHMSG(IERR_INVALID_TOKEN_CHAR, "Illegal character in clob."); } if (c == NEW_LINE_1) { // Note: all newlines were previously normalized to NEW_LINE_1. However, NEW_LINE_1 is a sentinel // value; the LF character is what actually needs to be pushed. c = '\n'; } PUSH_VALUE_BYTE(c); } else { IONCHECK(_ion_scanner_encode_utf8_char(scanner, c, dst, remaining, &written)); remaining -= written; dst += written; } dont_write_char: continue; } end_of_string: *p_bytes_written = len - remaining; *p_eos_encountered = eos_encountered; iRETURN; } iERR _ion_scanner_read_as_symbol(ION_SCANNER *scanner, BYTE *dst, SIZE len, SIZE *p_bytes_written) { iENTER; ION_STREAM *stream = scanner->_stream; SIZE remaining = len; int c; ASSERT(scanner); ASSERT(stream); ASSERT(dst); ASSERT(len > 0); ASSERT(p_bytes_written); ASSERT(scanner->_value_location == SVL_IN_STREAM); // if it's not a quoted symbol we just read it in here (note the // symbol termination condition is different than the string read // can handle: read, we *don't* interpret escape sequences, // we *don't* interpret utf8, count bytes written // the terminator is any non-basic symbol char for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case EOF: goto end_of_symbol; case EMPTY_ESCAPE_SEQUENCE3: case EMPTY_ESCAPE_SEQUENCE2: case EMPTY_ESCAPE_SEQUENCE1: break; default: if (!IS_1_BYTE_UTF8(c) || !IS_BASIC_SYMBOL_CHAR(c)) { goto end_of_symbol; } // here we write the char to the output buffer always the easy way PUSH_VALUE_BYTE(c); break; } } end_of_symbol: IONCHECK(_ion_scanner_unread_char(scanner, c)); *p_bytes_written = len - remaining; iRETURN; } iERR _ion_scanner_read_as_extended_symbol(ION_SCANNER *scanner, BYTE *buf, SIZE len, SIZE *p_bytes_written) { iENTER; BYTE *dst = buf; SIZE remaining = len; int c; ASSERT(scanner); ASSERT(buf); ASSERT(len > 0); ASSERT(p_bytes_written); ASSERT(scanner->_value_location == SVL_IN_STREAM); // until we encounter the terminator read we just copy the // remaining bytes until we encounter a non operator char, then we're done for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case EOF: goto end_of_symbol; case EMPTY_ESCAPE_SEQUENCE3: case EMPTY_ESCAPE_SEQUENCE2: case EMPTY_ESCAPE_SEQUENCE1: break; default: if (!IS_1_BYTE_UTF8(c) || !IS_OPERATOR_CHAR(c)) { goto end_of_symbol; } // here we write the char to the output buffer; always the easy way PUSH_VALUE_BYTE(c); break; } } end_of_symbol: IONCHECK(_ion_scanner_unread_char(scanner, c)); *p_bytes_written = len - remaining; iRETURN; } // for non-ascii characters this converts to utf8 and writes them out // this also handles the special forms of new line iERR _ion_scanner_encode_utf8_char(ION_SCANNER *scanner, int c, BYTE *buf, SIZE remaining, SIZE *p_bytes_written) { iENTER; BYTE *pb = buf; SIZE written = 0; // if there isn't enough room in the caller buffer we temporarily write // into the scanners small byte buffer, we can read the rest out of there later if (remaining < ION_utf8_max_length) { pb = scanner->_pending_bytes_pos = scanner->_pending_bytes; } // check for the various special values, like CR LF end of line if (c < 0) { switch (c) { case NEW_LINE_3: /* carraige return */ *pb++ = '\r'; break; case NEW_LINE_2: /* carraige newline return */ *pb++ = '\n'; *pb++ = '\r'; break; case NEW_LINE_1: /* newline */ *pb++ = '\n'; break; case EMPTY_ESCAPE_SEQUENCE3: /* slash carraige return */ case EMPTY_ESCAPE_SEQUENCE2: /* slash carraige newline return */ case EMPTY_ESCAPE_SEQUENCE1: /* slash newline */ break; default: FAILWITH(IERR_INVALID_UTF8); } } else if (c <= ION_utf8_1byte_max) { // 1 byte unicode character >= 0 and <= 0xff or <= 127) // 0yyyyyyy *pb++ = c; } else if (c <= ION_utf8_2byte_max) { // 2 byte unicode character >=128 and <= 0x7ff or <= 2047) // 5 + 6 == 11 bits // 110yyyyy 10zzzzzz *pb++ = ION_utf8_2byte_header | (c >> 6); *pb++ = ION_utf8_trailing_header | (c & ION_utf8_trailing_bits_mask); } else if (c <= ION_utf8_3byte_max) { // 3 byte unicode character >=2048 and <= 0xffff, <= 65535 // 4 + 6 + 6 == 16 bits // 1110xxxx 10yyyyyy 10zzzzzz *pb++ = ION_utf8_3byte_header | (c >> 12); *pb++ = ION_utf8_trailing_header | ((c >> 6) & ION_utf8_trailing_bits_mask); *pb++ = ION_utf8_trailing_header | (c & ION_utf8_trailing_bits_mask); } else if (c <= ION_utf8_4byte_max) { // 4 byte unicode character > 65535 (0xffff) and <= 2097151 <= 10xFFFFF // 3 + 3*6 == 21 bits // 11110www 10xxxxxx 10yyyyyy 10zzzzzz *pb++ = ION_utf8_4byte_header | (c >> 18); *pb++ = ION_utf8_trailing_header | ((c >> 12) & ION_utf8_trailing_bits_mask); *pb++ = ION_utf8_trailing_header | ((c >> 6) & ION_utf8_trailing_bits_mask); *pb++ = ION_utf8_trailing_header | (c & ION_utf8_trailing_bits_mask); } else { FAILWITH(IERR_INVALID_UTF8); } // now if we were writing in the scanners buffer copy as much // as the caller asked for, the rest remains in the buffer for later // and compute the written byte count in either case if (remaining < ION_utf8_max_length) { written = remaining; while (remaining--) { *buf++ = *scanner->_pending_bytes_pos++; } scanner->_pending_bytes_end = pb; } else { written = pb - buf; } *p_bytes_written = written; iRETURN; } iERR _ion_scanner_read_escaped_char(ION_SCANNER *scanner, ION_SUB_TYPE ist, int *p_char) { iENTER; int c; ASSERT(scanner); ASSERT(p_char); IONCHECK(_ion_scanner_read_char(scanner, &c)); switch (c) { case '0': // \u0000 \0 alert NUL c = '\0'; break; case NEW_LINE_1: c = EMPTY_ESCAPE_SEQUENCE1; break; case NEW_LINE_2: c = EMPTY_ESCAPE_SEQUENCE2; break; case NEW_LINE_3: c = EMPTY_ESCAPE_SEQUENCE3; break; case 'a': // \u0007 \a alert BEL c = '\a'; break; case 'b': // \u0008 \b backspace BS c = '\b'; break; case 't': // \u0009 \t horizontal tab HT c = '\t'; break; case 'n': // \ u000A \ n linefeed LF c = '\n'; break; case 'f': // \u000C \f form feed FF c = '\f'; break; case 'r': // \ u000D \ r carriage return CR c = '\r'; break; case 'v': // \u000B \v vertical tab VT c = '\v'; break; case '"': // \u0022 \" double quote c = '"'; break; case '\'': // \u0027 \' single quote c = '\''; break; case '?': // \u003F \? question mark c = '?'; break; case '\\': // \u005C \\ backslash c = '\\'; break; case '/': // \u002F \/ forward slash nothing \NL escaped NL expands to nothing c = '/'; break; case 'x': // any \xHH 2-digit hexadecimal unicode character equivalent to \ u00HH IONCHECK(_ion_scanner_read_hex_escape_value(scanner, 2, &c)); break; case 'u': // any \ uHHHH 4-digit hexadecimal unicode character if (ist == IST_CLOB_PLAIN || ist == IST_CLOB_LONG) { FAILWITH(IERR_INVALID_SYNTAX); } IONCHECK(_ion_scanner_read_hex_escape_value(scanner, 4, &c)); break; case 'U': // any \ UHHHHHHHH 8-digit hexadecimal unicode character, note max unicode value avoids -1 if (ist == IST_CLOB_PLAIN || ist == IST_CLOB_LONG) { FAILWITH(IERR_INVALID_SYNTAX); } IONCHECK(_ion_scanner_read_hex_escape_value(scanner, 8, &c)); break; default: FAILWITH(IERR_INVALID_ESCAPE_SEQUENCE); } *p_char = c; iRETURN; } // this expects the caller to gaurantee that there are hex_len character in pb iERR _ion_scanner_read_hex_escape_value(ION_SCANNER *scanner, int hex_len, int *p_hexchar) { iENTER; int c, d, hexchar = 0; while( hex_len-- ) { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (!IS_1_BYTE_UTF8(c)) FAILWITH(IERR_INVALID_ESCAPE_SEQUENCE); d = _ion_hex_character_value[c]; // d < 0 happens on overflow if (d < 0) FAILWITH(IERR_INVALID_ESCAPE_SEQUENCE); hexchar = hexchar * 16 + d; } if (hexchar < 0 || hexchar > ION_max_unicode_scalar) { FAILWITH(IERR_INVALID_ESCAPE_SEQUENCE); } *p_hexchar = hexchar; iRETURN; } iERR _ion_scanner_peek_for_next_triple_quote(ION_SCANNER *scanner, BOOL is_clob, BOOL *p_triple_quote_found) { iENTER; ION_STREAM *stream = scanner->_stream; int c; ASSERT(scanner); ASSERT(stream); // skip whitespace and comments (if allowed) after the value if (is_clob) { IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); } else { IONCHECK(_ion_scanner_read_past_whitespace(scanner, &c)); } if (c == '\'') { // and we expect to find two more of them right next to one another IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { // and we expect to find two more of them right next to one another IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '\'') { // and we did, so we're done *p_triple_quote_found = TRUE; SUCCEED(); } } IONCHECK(_ion_scanner_unread_char(scanner, c)); c = '\''; } IONCHECK(_ion_scanner_unread_char(scanner, c)); *p_triple_quote_found = FALSE; iRETURN; } iERR _ion_scanner_read_lob_closing_braces(ION_SCANNER *scanner) { iENTER; ION_STREAM *stream = scanner->_stream; int c; ASSERT(scanner); ASSERT(stream); // we use lob rules for skipping whitespace after the value IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); if (c != '}') { FAILWITH(IERR_INVALID_LOB_TERMINATOR); } // and we expect to find two of them right next to one another IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c != '}') { FAILWITH(IERR_INVALID_LOB_TERMINATOR); } iRETURN; } // Disabled sanitizer shift checks, due to unsigned int shifting. NOSAN_SHIFT iERR _ion_scanner_read_as_base64(ION_SCANNER *scanner, BYTE *buf, SIZE len, SIZE *p_bytes_written, BOOL *p_eos_encountered) { iENTER; BOOL eos_encountered = FALSE; BYTE *dst = buf; SIZE remaining = len, written, output_length; int c, b64_value, b64_block; int padding = 0; ASSERT(scanner); ASSERT(buf); ASSERT(len > 0); ASSERT(p_bytes_written); ASSERT(p_eos_encountered); ASSERT(scanner->_value_location == SVL_IN_STREAM); // first we have to check for uncopied utf8 bytes that might have been left // behind in a partial copy (where the original read didn't have a big enough // buffer to hold the full 3 bytes we get from 1 base64 block of 4 characters) if (scanner->_pending_bytes_end > scanner->_pending_bytes) { IONCHECK(_ion_scanner_read_cached_bytes(scanner, buf, remaining, &written)); remaining -= written; if (remaining < 1) { *p_bytes_written = written; *p_eos_encountered = FALSE; // we haven't seen the closing curlies yet SUCCEED(); } dst += written; } // now we start processing the character from the input stream // // the basic plan is to read 4 actual character (ignoring // whitespace and such, then convert those 4 into 1-3 output // bytes, either into the callers buf or the pending bytes // buffer while (remaining) { // this doesn't help perf, but whitespace is allowed so there's not // much to do about it (and i'm not overly concerned about the perf // of converting base64 text since it should an unusual case) IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); // this is the point valid time to see a closeing curly bracket if (c == '}') { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (c == '}') { // and it better be two of them eos_encountered = TRUE; break; } FAILWITH(IERR_BAD_BASE64_BLOB); } if (!IS_1_BYTE_UTF8(c)) FAILWITH(IERR_BAD_BASE64_BLOB); b64_block = _Ion_base64_value[c]; if (b64_block < 0) FAILWITH(IERR_BAD_BASE64_BLOB); // character 2 of 4 IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); if (!IS_1_BYTE_UTF8(c) || ((b64_value = _Ion_base64_value[c]) < 0)) FAILWITH(IERR_BAD_BASE64_BLOB); b64_block <<= 6; b64_block |= b64_value; // character 3 of 4 (we may hit a trailer at this point) IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); if (!IS_1_BYTE_UTF8(c) || ((b64_value = _Ion_base64_value[c]) < 0)) { // if it's not a valid base64 character - it better be a trailing char ('=') if (c != ION_BASE64_TRAILING_CHAR) { FAILWITH(IERR_BAD_BASE64_BLOB); } padding = 2; // don't care what this is, we'll never read these bits on output b64_value = 0; // but we don't want it to be -1 } b64_block <<= 6; b64_block |= b64_value; // character 4 of 4 (we may hit a trailer at this point as well) IONCHECK(_ion_scanner_read_past_lob_whitespace(scanner, &c)); if (!IS_1_BYTE_UTF8(c) || ((b64_value = _Ion_base64_value[c]) < 0)) { // if it's not a valid base64 character - it better be a trailing char ('=') if (c != ION_BASE64_TRAILING_CHAR) { FAILWITH(IERR_BAD_BASE64_BLOB); } if (padding == 0) { padding = 1; } //else { // if (padding == 2) it's still 2 //} // don't care what this is, we'll never read these bits on output b64_value = 0; // but we don't want it to be -1 } else { // when we don't have a pad char (i.e. we have a valid char) make sure // we didn't already see see a pad if (padding == 2) FAILWITH(IERR_BAD_BASE64_BLOB); } b64_block <<= 6; b64_block |= b64_value; // figure out if there's enough remaining space to write these bytes // and if there is claim we did output_length = 3 - padding; // now actually move the 1-3 bytes into the output buffer // note that this expects the bytes to be shifted whether they // are present or not, that is the value is high bit justified. // we first move as many as we can into the caller buffer while (output_length-- && remaining--) { *dst++ = (b64_block & 0xff0000) >> 16; b64_block <<= 8; } // and if there's anything left we move it into the scanners temp // pending value buffer and bail if (output_length > 0) { ASSERT(scanner->_pending_bytes_pos == scanner->_pending_bytes); dst = scanner->_pending_bytes_pos = scanner->_pending_bytes; while (output_length--) { *dst++ = (b64_block & 0xff0000) >> 16; b64_block <<= 8; } scanner->_pending_bytes_end = dst; break; // break out of the outer while (remaining) loop } } // we've copied all we can, now we just tell the caller what happened *p_bytes_written = len - remaining; *p_eos_encountered = eos_encountered; if (eos_encountered) { // we took it out of the stream but we don't know where the chars went // if the caller is passing in the value buffer they set this back to SVL_VALUE_IMAGE scanner->_value_location = SVL_NONE; } iRETURN; } // when this is called if we read a '-' sign will be -1 otherwise +1 // c will be the first actual digit (and it must be a digit to get here) // at this point we might still see an int, a hex int, a binary int, a float, a double, a decimal or a timestamp // we'll copy bytes into _value_buffer as we process them. they'll be nice and need // (and not hold page buffers); iERR _ion_scanner_read_possible_number(ION_SCANNER *scanner, int c, int sign, ION_SUB_TYPE *p_ist) { iENTER; ION_SUB_TYPE t = IST_NONE; BYTE *dst = scanner->_value_buffer; SIZE remaining_before, remaining = scanner->_value_buffer_length; BOOL is_zero; ASSERT(isdigit(c)); // at this point we're moving the value bytes into the value buffer // whether it is valid of not, but in either case it won't be in // the input stream any longer scanner->_value_location = SVL_NONE; // push the optionally read sign and the first digit if (sign == -1) { PUSH_VALUE_BYTE('-'); } PUSH_VALUE_BYTE(c); is_zero = (c == '0'); // we need to save this to complain later if someone includes unnessesary leading 0's IONCHECK(_ion_scanner_read_char(scanner, &c)); // if we have an x we have a hexadecimal int if (c == 'x' || c == 'X') { PUSH_VALUE_BYTE(c); IONCHECK(_ion_scanner_read_hex_int(scanner, &dst, &remaining, &c)); t = (sign == -1) ? IST_INT_NEG_HEX : IST_INT_POS_HEX; } else if (c == 'b' || c == 'B') { PUSH_VALUE_BYTE(c); IONCHECK(_ion_scanner_read_binary_int(scanner, &dst, &remaining, &c)); t = (sign == -1) ? IST_INT_NEG_BINARY : IST_INT_POS_BINARY; } else { // we'll use this to check for a 4 digit year if this is a timestamp, // and for the leading zero test (if digits > 1 and a leading 0) // the -1 is because we already pushed the first digit on the output buffer remaining_before = remaining + 1; // if the char is a digit, it's the leading digits of a decimal, float or the year of a timestamp // or the entire decimal integer - so we'll read those digits in until we hit a char that will // let us distinguish which it is if (IS_1_BYTE_UTF8(c) && (isdigit(c) || c == '_')) { if (isdigit(c)) { PUSH_VALUE_BYTE(c); IONCHECK(_ion_scanner_read_digits_with_underscores(scanner, &dst, &remaining, &c, TRUE)); } else { // c == '_' IONCHECK(_ion_scanner_read_digits_with_underscores(scanner, &dst, &remaining, &c, FALSE)); if ((remaining_before - remaining) == 1) { // Didn't find any more digits after the underscore. FAILWITHMSG(IERR_INVALID_TOKEN_CHAR, "Illegal underscore in number."); } } } // we read all the leading digits - we check for timestamp first if (c == '-' || c == 'T') { // it is a '-' or a 'T' so this has to be a timestamp if (sign == -1 || ((remaining_before - remaining) != 4)) { // no negative timestamps and a year is 4 digits long FAILWITH(IERR_INVALID_TIMESTAMP); } IONCHECK(_ion_scanner_read_timestamp(scanner, c, &dst, &remaining, &c, &t)); // note that read timestamp doesn't overread } else { // so might a decimal point (if decimal, float, or double) before we hit // it there was a leading zero we shouldn't have any other digits but we might have a '.' // NB that even if it is a '.', it hasn't been pushed yet/ if (is_zero && (remaining_before - remaining) > 1) { // there is more than 1 digit but there is a leading 0 FAILWITH(IERR_INVALID_LEADING_ZEROS); } // if it's decimal point, read the digits after the decimal, then look // for the exponent if (c == '.') { PUSH_VALUE_BYTE(c); IONCHECK(_ion_scanner_read_digits_with_underscores(scanner, &dst, &remaining, &c, FALSE)); // with a decimal point we'll presume this is a a decimal unless we see a 'e' t = IST_DECIMAL; } else { // otherwise we'll presume this is an int until we've checked for an 'e' or 'd' t = (sign == -1) ? IST_INT_NEG_DECIMAL : IST_INT_POS_DECIMAL; } if (c == 'd' || c == 'D') { PUSH_VALUE_BYTE(c); IONCHECK(_ion_scanner_read_exponent(scanner, &dst, &remaining, &c)); // ahh, this is a decimal *with* a 'd' t = IST_DECIMAL_D; } else if (c == 'e' || c == 'E') { PUSH_VALUE_BYTE(c); IONCHECK(_ion_scanner_read_exponent(scanner, &dst, &remaining, &c)); // it's a float 64 with a 'e' t = IST_FLOAT_64; } // POSSIBLITY: else if (c == 'f' || c == 'F') { // POSSIBLITY: PUSH_VALUE_BYTE(c); // POSSIBLITY: IONCHECK(_ion_scanner_read_exponent(scanner, &dst, &remaining, &c)); // POSSIBLITY: t = IST_FLOAT_32; // POSSIBLITY: } // in all these cases we have read 1 character too far IONCHECK(_ion_scanner_unread_char(scanner, c)); } } if (c > 0 && strchr(NUMERIC_STOP_CHARACTERS, c) == NULL) { FAILWITH(IERR_INVALID_SYNTAX); } // we have a good_value, set up the value state to reflect this PUSH_VALUE_BYTE('\0'); scanner->_value_location = SVL_VALUE_IMAGE; scanner->_value_image.value = scanner->_value_buffer; scanner->_value_image.length = scanner->_value_buffer_length - remaining - 1; // we don't count the null terminator we also pushed onto the end of the value string *p_ist = t; SUCCEED(); iRETURN; } iERR _ion_scanner_read_radix_int(ION_SCANNER *scanner, BYTE **p_dst, SIZE *p_remaining, int *p_char, ION_INT_RADIX radix, BOOL underscore_allowed) { iENTER; int c, remaining = *p_remaining; BYTE *dst = *p_dst; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (!IS_1_BYTE_UTF8(c)) { break; } if (c == '_') { if (!underscore_allowed) { FAILWITHMSG(IERR_INVALID_TOKEN_CHAR, "Illegal underscore in number."); } underscore_allowed = FALSE; continue; // Do not append the underscore. } if (!IS_RADIX_CHAR(c, radix)) { break; } PUSH_VALUE_BYTE(c); underscore_allowed = TRUE; } if (dst != *p_dst && !underscore_allowed) { FAILWITHMSG(IERR_INVALID_TOKEN_CHAR, "Illegal underscore in number.") } *p_char = c; *p_remaining = remaining; *p_dst = dst; iRETURN; } iERR _ion_scanner_read_hex_int(ION_SCANNER *scanner, BYTE **p_dst, SIZE *p_remaining, int *p_end_char) { iENTER; int c; IONCHECK(_ion_scanner_read_radix_int(scanner, p_dst, p_remaining, &c, ION_INT_HEX, FALSE)); *p_end_char = c; IONCHECK(_ion_scanner_unread_char(scanner, c)); iRETURN; } iERR _ion_scanner_read_binary_int(ION_SCANNER *scanner, BYTE **p_dst, SIZE *p_remaining, int *p_end_char) { iENTER; int c; IONCHECK(_ion_scanner_read_radix_int(scanner, p_dst, p_remaining, &c, ION_INT_BINARY, FALSE)); *p_end_char = c; IONCHECK(_ion_scanner_unread_char(scanner, c)); iRETURN; } iERR _ion_scanner_read_digits(ION_SCANNER *scanner, BYTE **p_dst, SIZE *p_remaining, int *p_char) { iENTER; BYTE *dst = *p_dst; SIZE remaining = *p_remaining; int c; for (;;) { IONCHECK(_ion_scanner_read_char(scanner, &c)); if (!IS_1_BYTE_UTF8(c) || !isdigit(c)) { break; } PUSH_VALUE_BYTE(c); } *p_char = c; *p_remaining = remaining; *p_dst = dst; iRETURN; } iERR _ion_scanner_read_digits_with_underscores(ION_SCANNER *scanner, BYTE **p_dst, SIZE *p_remaining, int *p_char, BOOL underscore_allowed) { iENTER; IONCHECK(_ion_scanner_read_radix_int(scanner, p_dst, p_remaining, p_char, ION_INT_DECIMAL, underscore_allowed)); iRETURN; } iERR _ion_scanner_read_exponent(ION_SCANNER *scanner, BYTE **p_dst, SIZE *p_remaining, int *p_char) { iENTER; BYTE *dst = *p_dst; SIZE remaining = *p_remaining; int c; // the first char following the 'e' ('d' or other) is either a sign or a digit IONCHECK(_ion_scanner_read_char(scanner, &c)); if (!IS_1_BYTE_UTF8(c)) { goto past_exponent; } if (isdigit(c) || c == '-' || c == '+') { PUSH_VALUE_BYTE(c); } else { goto past_exponent; } // from here on out it's only digits, we'll let the digit scanner do it IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); past_exponent: *p_char = c; *p_remaining = remaining; *p_dst = dst; iRETURN; } iERR _ion_scanner_read_timestamp(ION_SCANNER *scanner, int c, BYTE **p_dst, SIZE *p_remaining, int *p_end_char, ION_SUB_TYPE *p_ist) { iENTER; ION_SUB_TYPE t = IST_TIMESTAMP_YEAR; BYTE *dst = *p_dst; SIZE remaining = *p_remaining; SIZE remaining_before; BOOL valid_termination_char = FALSE; BOOL has_time = FALSE; // at this point we have a 4 digit year (or a fail) // we should have already checked for the negative sign before we were called if (remaining != (scanner->_value_buffer_length - 4)) { FAILWITH(IERR_INVALID_TIMESTAMP); } // so the year has at least a month following it (before a timezone offset) if (c == '-') { PUSH_VALUE_BYTE(c); // read month remaining_before = remaining; IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); if ((remaining_before - 2) != remaining) { FAILWITH(IERR_INVALID_TIMESTAMP); } t = IST_TIMESTAMP_MONTH; if (c == '-') { PUSH_VALUE_BYTE(c); // read a day remaining_before = remaining; IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); if ((remaining_before - 2) != remaining) { FAILWITH(IERR_INVALID_TIMESTAMP); } t = IST_TIMESTAMP_DAY; } } // now we're past the date, do we have a timestamp if (c == 'T') { PUSH_VALUE_BYTE(c); // read hour remaining_before = remaining; IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); if (remaining_before == remaining) { // not a digit after the 'T', so: no hours, no time zone offset goto check_timestamp_terminator; } if ((remaining_before - 2) != remaining) { FAILWITH(IERR_INVALID_TIMESTAMP); } // we have a time value, so we'll have to make sure we have a timezone specifier (below) has_time = TRUE; // if we have hours we have to have minutes if (c != ':') { FAILWITH(IERR_INVALID_TIMESTAMP); } PUSH_VALUE_BYTE(c); remaining_before = remaining; IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); if ((remaining_before - 2) != remaining) { FAILWITH(IERR_INVALID_TIMESTAMP); } t = IST_TIMESTAMP_TIME; // now we might have seconds if (c == ':') { PUSH_VALUE_BYTE(c); remaining_before = remaining; IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); if ((remaining_before - 2) != remaining) { FAILWITH(IERR_INVALID_TIMESTAMP); } t = IST_TIMESTAMP_WITH_SECS; // we might even have decimal (fractional seconds) if (c == '.') { PUSH_VALUE_BYTE(c); IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); } t = IST_TIMESTAMP_WITH_FRAC_SECS; } } if (c == 'Z' || c == 'z') { PUSH_VALUE_BYTE(c); //we don't do anything more with the 'z', it's been pushed onto the value // but we will be checking the termination character (just after the 'z') IONCHECK(_ion_scanner_read_char(scanner, &c)); } else if (c == '+' || c == '-') { PUSH_VALUE_BYTE(c); // read hour remaining_before = remaining; IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); if ((remaining_before - 2) != remaining) { FAILWITH(IERR_INVALID_TIMESTAMP); } // on a timezone offset if we might have minutes if (c == ':') { PUSH_VALUE_BYTE(c); remaining_before = remaining; IONCHECK(_ion_scanner_read_digits(scanner, &dst, &remaining, &c)); if ((remaining_before - 2) != remaining) { FAILWITH(IERR_INVALID_TIMESTAMP); } } } else if (has_time == TRUE) { // we have a time value (at least hours) but not timezone offset - that's a bad timestamp FAILWITH(IERR_INVALID_TIMESTAMP); } check_timestamp_terminator: // if there wasn't a timezone offset, we'll need this character again later IONCHECK(_ion_scanner_unread_char(scanner, c)); IONCHECK(_ion_scanner_is_value_terminator(scanner, c, &valid_termination_char)); if (!valid_termination_char) { FAILWITH(IERR_INVALID_TIMESTAMP); } *p_dst = dst; *p_remaining = remaining; *p_ist = t; *p_end_char = c; iRETURN; }