/* * UTF-8 stream functions * * Copyright (C) 2008-2023, Joachim Metz * * Refer to AUTHORS for acknowledgements. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see . */ #include #include #include #include "libuna_definitions.h" #include "libuna_libcerror.h" #include "libuna_types.h" #include "libuna_unicode_character.h" #include "libuna_utf8_stream.h" /* Copies an UTF-8 stream byte order mark (BOM) * Returns 1 if successful or -1 on error */ int libuna_utf8_stream_copy_byte_order_mark( uint8_t *utf8_stream, size_t utf8_stream_size, size_t *utf8_stream_index, libcerror_error_t **error ) { static char *function = "libuna_utf8_stream_copy_byte_order_mark"; if( utf8_stream == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream.", function ); return( -1 ); } if( utf8_stream_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-8 stream size value exceeds maximum.", function ); return( -1 ); } if( utf8_stream_index == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream index.", function ); return( -1 ); } if( ( *utf8_stream_index + 3 ) > utf8_stream_size ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL, "%s: UTF-8 stream too small.", function ); return( -1 ); } utf8_stream[ *utf8_stream_index ] = 0xef; utf8_stream[ *utf8_stream_index + 1 ] = 0xbb; utf8_stream[ *utf8_stream_index + 2 ] = 0xbf; *utf8_stream_index += 3; return( 1 ); } /* Determines the size of an UTF-8 stream from an UTF-8 string * Returns 1 if successful or -1 on error */ int libuna_utf8_stream_size_from_utf8( const libuna_utf8_character_t *utf8_string, size_t utf8_string_size, size_t *utf8_stream_size, libcerror_error_t **error ) { static char *function = "libuna_utf8_stream_size_from_utf8"; libuna_unicode_character_t unicode_character = 0; size_t utf8_string_index = 0; if( utf8_string == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 string.", function ); return( -1 ); } if( utf8_string_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-8 string size value exceeds maximum.", function ); return( -1 ); } if( utf8_stream_size == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream size.", function ); return( -1 ); } /* Add the byte order mark */ *utf8_stream_size = 3; while( utf8_string_index < utf8_string_size ) { /* Convert the UTF-8 character bytes into an Unicode character */ if( libuna_unicode_character_copy_from_utf8( &unicode_character, utf8_string, utf8_string_size, &utf8_string_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_INPUT_FAILED, "%s: unable to copy Unicode character from UTF-8.", function ); return( -1 ); } /* Determine how many UTF-8 character bytes are required */ if( libuna_unicode_character_size_to_utf8( unicode_character, utf8_stream_size, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to determine size of Unicode character in UTF-8.", function ); return( -1 ); } if( unicode_character == 0 ) { break; } } return( 1 ); } /* Copies an UTF-8 stream from an UTF-8 string * Returns 1 if successful or -1 on error */ int libuna_utf8_stream_copy_from_utf8( uint8_t *utf8_stream, size_t utf8_stream_size, const libuna_utf8_character_t *utf8_string, size_t utf8_string_size, libcerror_error_t **error ) { static char *function = "libuna_utf8_stream_copy_from_utf8"; libuna_unicode_character_t unicode_character = 0; size_t utf8_stream_index = 0; size_t utf8_string_index = 0; if( utf8_stream == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream.", function ); return( -1 ); } if( utf8_stream_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-8 stream size value exceeds maximum.", function ); return( -1 ); } if( utf8_string == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 string.", function ); return( -1 ); } if( utf8_string_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-8 string size value exceeds maximum.", function ); return( -1 ); } if( utf8_stream_size < utf8_string_size ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL, "%s: UTF-8 stream is too small.", function ); return( -1 ); } if( libuna_utf8_stream_copy_byte_order_mark( utf8_stream, utf8_stream_size, &utf8_stream_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to copy UTF-8 byte order mark.", function ); return( -1 ); } while( utf8_string_index < utf8_string_size ) { /* Convert the UTF-8 string bytes into an Unicode character */ if( libuna_unicode_character_copy_from_utf8( &unicode_character, utf8_string, utf8_string_size, &utf8_string_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_INPUT_FAILED, "%s: unable to copy Unicode character from UTF-8 string.", function ); return( -1 ); } /* Convert the Unicode character into UTF-8 stream bytes */ if( libuna_unicode_character_copy_to_utf8( unicode_character, utf8_stream, utf8_stream_size, &utf8_stream_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to copy Unicode character to UTF-8 stream.", function ); return( -1 ); } if( unicode_character == 0 ) { break; } } return( 1 ); } /* Determines the size of an UTF-8 stream from an UTF-16 string * Returns 1 if successful or -1 on error */ int libuna_utf8_stream_size_from_utf16( const libuna_utf16_character_t *utf16_string, size_t utf16_string_size, size_t *utf8_stream_size, libcerror_error_t **error ) { static char *function = "libuna_utf8_stream_size_from_utf16"; libuna_unicode_character_t unicode_character = 0; size_t utf16_string_index = 0; if( utf16_string == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-16 string.", function ); return( -1 ); } if( utf16_string_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-16 string size value exceeds maximum.", function ); return( -1 ); } if( utf8_stream_size == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream size.", function ); return( -1 ); } /* Add the byte order mark */ *utf8_stream_size = 3; while( utf16_string_index < utf16_string_size ) { /* Convert the UTF-16 character bytes into an Unicode character */ if( libuna_unicode_character_copy_from_utf16( &unicode_character, utf16_string, utf16_string_size, &utf16_string_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_INPUT_FAILED, "%s: unable to copy Unicode character from UTF-16.", function ); return( -1 ); } /* Determine how many UTF-8 character bytes are required */ if( libuna_unicode_character_size_to_utf8( unicode_character, utf8_stream_size, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to determine size of Unicode character in UTF-8.", function ); return( -1 ); } if( unicode_character == 0 ) { break; } } return( 1 ); } /* Copies an UTF-8 stream from an UTF-16 string * Returns 1 if successful or -1 on error */ int libuna_utf8_stream_copy_from_utf16( uint8_t *utf8_stream, size_t utf8_stream_size, const libuna_utf16_character_t *utf16_string, size_t utf16_string_size, libcerror_error_t **error ) { static char *function = "libuna_utf8_stream_copy_from_utf16"; libuna_unicode_character_t unicode_character = 0; size_t utf16_string_index = 0; size_t utf8_stream_index = 0; if( utf8_stream == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream.", function ); return( -1 ); } if( utf8_stream_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-8 stream size value exceeds maximum.", function ); return( -1 ); } if( utf16_string == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-16 string.", function ); return( -1 ); } if( utf16_string_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-16 string size value exceeds maximum.", function ); return( -1 ); } if( libuna_utf8_stream_copy_byte_order_mark( utf8_stream, utf8_stream_size, &utf8_stream_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to copy UTF-8 byte order mark.", function ); return( -1 ); } while( utf16_string_index < utf16_string_size ) { /* Convert the UTF-16 string bytes into an Unicode character */ if( libuna_unicode_character_copy_from_utf16( &unicode_character, utf16_string, utf16_string_size, &utf16_string_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_INPUT_FAILED, "%s: unable to copy Unicode character from UTF-16 string.", function ); return( -1 ); } /* Convert the Unicode character into UTF-8 stream bytes */ if( libuna_unicode_character_copy_to_utf8( unicode_character, utf8_stream, utf8_stream_size, &utf8_stream_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to copy Unicode character to UTF-8 stream.", function ); return( -1 ); } if( unicode_character == 0 ) { break; } } return( 1 ); } /* Determines the size of an UTF-8 stream from an UTF-32 string * Returns 1 if successful or -1 on error */ int libuna_utf8_stream_size_from_utf32( const libuna_utf32_character_t *utf32_string, size_t utf32_string_size, size_t *utf8_stream_size, libcerror_error_t **error ) { static char *function = "libuna_utf8_stream_size_from_utf32"; libuna_unicode_character_t unicode_character = 0; size_t utf32_string_index = 0; if( utf32_string == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-32 string.", function ); return( -1 ); } if( utf32_string_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-32 string size value exceeds maximum.", function ); return( -1 ); } if( utf8_stream_size == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream size.", function ); return( -1 ); } /* Add the byte order mark */ *utf8_stream_size = 3; while( utf32_string_index < utf32_string_size ) { /* Convert the UTF-32 character bytes into an Unicode character */ if( libuna_unicode_character_copy_from_utf32( &unicode_character, utf32_string, utf32_string_size, &utf32_string_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_INPUT_FAILED, "%s: unable to copy Unicode character from UTF-32.", function ); return( -1 ); } /* Determine how many UTF-8 character bytes are required */ if( libuna_unicode_character_size_to_utf8( unicode_character, utf8_stream_size, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to determine size of Unicode character in UTF-8.", function ); return( -1 ); } if( unicode_character == 0 ) { break; } } return( 1 ); } /* Copies an UTF-8 stream from an UTF-32 string * Returns 1 if successful or -1 on error */ int libuna_utf8_stream_copy_from_utf32( uint8_t *utf8_stream, size_t utf8_stream_size, const libuna_utf32_character_t *utf32_string, size_t utf32_string_size, libcerror_error_t **error ) { static char *function = "libuna_utf8_stream_copy_from_utf32"; libuna_unicode_character_t unicode_character = 0; size_t utf32_string_index = 0; size_t utf8_stream_index = 0; if( utf8_stream == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-8 stream.", function ); return( -1 ); } if( utf8_stream_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-8 stream size value exceeds maximum.", function ); return( -1 ); } if( utf32_string == NULL ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_INVALID_VALUE, "%s: invalid UTF-32 string.", function ); return( -1 ); } if( utf32_string_size > (size_t) SSIZE_MAX ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_ARGUMENTS, LIBCERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM, "%s: invalid UTF-32 string size value exceeds maximum.", function ); return( -1 ); } if( libuna_utf8_stream_copy_byte_order_mark( utf8_stream, utf8_stream_size, &utf8_stream_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to copy UTF-8 byte order mark.", function ); return( -1 ); } while( utf32_string_index < utf32_string_size ) { /* Convert the UTF-32 character bytes into an Unicode character */ if( libuna_unicode_character_copy_from_utf32( &unicode_character, utf32_string, utf32_string_size, &utf32_string_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_INPUT_FAILED, "%s: unable to copy Unicode character from UTF-32.", function ); return( -1 ); } /* Convert the Unicode character into UTF-8 character bytes */ if( libuna_unicode_character_copy_to_utf8( unicode_character, utf8_stream, utf8_stream_size, &utf8_stream_index, error ) != 1 ) { libcerror_error_set( error, LIBCERROR_ERROR_DOMAIN_CONVERSION, LIBCERROR_CONVERSION_ERROR_OUTPUT_FAILED, "%s: unable to copy Unicode character to UTF-8 stream.", function ); return( -1 ); } if( unicode_character == 0 ) { break; } } return( 1 ); }