Page Menu
Home
Phabricator
Search
Configure Global Search
Log In
Files
F7712363
libuna_utf8_stream.c
No One
Temporary
Actions
Download File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Size
15 KB
Referenced Files
None
Subscribers
None
libuna_utf8_stream.c
View Options
/*
* UTF-8 stream functions
*
* Copyright (c) 2008-2011, Joachim Metz <jbmetz@users.sourceforge.net>
*
* Refer to AUTHORS for acknowledgements.
*
* This software is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This software is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this software. If not, see <http://www.gnu.org/licenses/>.
*/
#include <common.h>
#include <memory.h>
#include <types.h>
#include <liberror.h>
#include "libuna_definitions.h"
#include "libuna_inline.h"
#include "libuna_types.h"
#include "libuna_unicode_character.h"
#include "libuna_utf8_stream.h"
/* Copies an UTF-8 stream byte order mark (BOM)
* Returns 1 if successful or -1 on error
*/
LIBUNA_INLINE \
int libuna_utf8_stream_copy_byte_order_mark(
uint8_t *utf8_stream,
size_t utf8_stream_size,
size_t *utf8_stream_index,
liberror_error_t **error )
{
static char *function = "libuna_utf8_stream_copy_byte_order_mark";
if( utf8_stream == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream.",
function );
return( -1 );
}
if( utf8_stream_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-8 stream size value exceeds maximum.",
function );
return( -1 );
}
if( utf8_stream_index == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream index.",
function );
return( -1 );
}
if( ( *utf8_stream_index + 2 ) >= utf8_stream_size )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
"%s: UTF-8 stream too small.",
function );
return( -1 );
}
utf8_stream[ *utf8_stream_index ] = 0xef;
utf8_stream[ *utf8_stream_index + 1 ] = 0xbb;
utf8_stream[ *utf8_stream_index + 2 ] = 0xbf;
*utf8_stream_index += 3;
return( 1 );
}
/* Determines the size of an UTF-8 stream from an UTF-8 string
* Returns 1 if successful or -1 on error
*/
int libuna_utf8_stream_size_from_utf8(
const libuna_utf8_character_t *utf8_string,
size_t utf8_string_size,
size_t *utf8_stream_size,
liberror_error_t **error )
{
static char *function = "libuna_utf8_stream_size_from_utf8";
if( utf8_string == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 string.",
function );
return( -1 );
}
if( utf8_string_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-8 string size value exceeds maximum.",
function );
return( -1 );
}
if( utf8_stream_size == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream size.",
function );
return( -1 );
}
/* Add the byte order mark
* Assumes the UTF-8 string is santized
*/
*utf8_stream_size = 3 + utf8_string_size;
return( 1 );
}
/* Copies an UTF-8 stream from an UTF-8 string
* Returns 1 if successful or -1 on error
*/
int libuna_utf8_stream_copy_from_utf8(
uint8_t *utf8_stream,
size_t utf8_stream_size,
const libuna_utf8_character_t *utf8_string,
size_t utf8_string_size,
liberror_error_t **error )
{
static char *function = "libuna_utf8_stream_copy_from_utf8";
size_t utf8_stream_iterator = 0;
if( utf8_stream == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream.",
function );
return( -1 );
}
if( utf8_stream_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-8 stream size value exceeds maximum.",
function );
return( -1 );
}
if( utf8_string == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 string.",
function );
return( -1 );
}
if( utf8_string_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-8 string size value exceeds maximum.",
function );
return( -1 );
}
if( utf8_stream_size < utf8_string_size )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_TOO_SMALL,
"%s: UTF-8 stream is too small.",
function );
return( -1 );
}
if( libuna_utf8_stream_copy_byte_order_mark(
utf8_stream,
utf8_stream_size,
&utf8_stream_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_OUTPUT_FAILED,
"%s: unable to copy UTF-8 byte order mark.",
function );
return( -1 );
}
/* Assumes the UTF-8 string is santized
*/
if( memory_copy(
&utf8_stream[ utf8_stream_iterator ],
utf8_string,
utf8_string_size ) == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: unable to copy UTF-8 string to stream.",
function );
return( -1 );
}
return( 1 );
}
/* Determines the size of an UTF-8 stream from an UTF-16 string
* Returns 1 if successful or -1 on error
*/
int libuna_utf8_stream_size_from_utf16(
const libuna_utf16_character_t *utf16_string,
size_t utf16_string_size,
size_t *utf8_stream_size,
liberror_error_t **error )
{
static char *function = "libuna_utf8_stream_size_from_utf16";
size_t utf16_string_iterator = 0;
libuna_unicode_character_t unicode_character = 0;
if( utf16_string == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-16 string.",
function );
return( -1 );
}
if( utf16_string_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-16 string size value exceeds maximum.",
function );
return( -1 );
}
if( utf8_stream_size == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream size.",
function );
return( -1 );
}
/* Add the byte order mark
*/
*utf8_stream_size = 3;
while( utf16_string_iterator < utf16_string_size )
{
/* Convert the UTF-16 character bytes into a Unicode character
*/
if( libuna_unicode_character_copy_from_utf16(
&unicode_character,
utf16_string,
utf16_string_size,
&utf16_string_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_INPUT_FAILED,
"%s: unable to copy Unicode character from UTF-16.",
function );
return( -1 );
}
/* Determine how many UTF-8 character bytes are required
*/
if( libuna_unicode_character_size_to_utf8(
unicode_character,
utf8_stream_size,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_OUTPUT_FAILED,
"%s: unable to determine size of Unicode character in UTF-8.",
function );
return( -1 );
}
}
return( 1 );
}
/* Copies an UTF-8 stream from an UTF-16 string
* Returns 1 if successful or -1 on error
*/
int libuna_utf8_stream_copy_from_utf16(
uint8_t *utf8_stream,
size_t utf8_stream_size,
const libuna_utf16_character_t *utf16_string,
size_t utf16_string_size,
liberror_error_t **error )
{
static char *function = "libuna_utf8_stream_copy_from_utf16";
size_t utf16_string_iterator = 0;
size_t utf8_stream_iterator = 0;
libuna_unicode_character_t unicode_character = 0;
if( utf8_stream == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream.",
function );
return( -1 );
}
if( utf8_stream_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-8 stream size value exceeds maximum.",
function );
return( -1 );
}
if( utf16_string == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-16 string.",
function );
return( -1 );
}
if( utf16_string_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-16 string size value exceeds maximum.",
function );
return( -1 );
}
if( libuna_utf8_stream_copy_byte_order_mark(
utf8_stream,
utf8_stream_size,
&utf8_stream_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_OUTPUT_FAILED,
"%s: unable to copy UTF-8 byte order mark.",
function );
return( -1 );
}
while( utf16_string_iterator < utf16_string_size )
{
/* Convert the UTF-16 string bytes into a Unicode character
*/
if( libuna_unicode_character_copy_from_utf16(
&unicode_character,
utf16_string,
utf16_string_size,
&utf16_string_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_INPUT_FAILED,
"%s: unable to copy Unicode character from UTF-16 string.",
function );
return( -1 );
}
/* Convert the Unicode character into UTF-8 stream bytes
*/
if( libuna_unicode_character_copy_to_utf8(
unicode_character,
utf8_stream,
utf8_stream_size,
&utf8_stream_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_OUTPUT_FAILED,
"%s: unable to copy Unicode character to UTF-8 stream.",
function );
return( -1 );
}
}
return( 1 );
}
/* Determines the size of an UTF-8 stream from an UTF-32 string
* Returns 1 if successful or -1 on error
*/
int libuna_utf8_stream_size_from_utf32(
const libuna_utf32_character_t *utf32_string,
size_t utf32_string_size,
size_t *utf8_stream_size,
liberror_error_t **error )
{
static char *function = "libuna_utf8_stream_size_from_utf32";
size_t utf32_string_iterator = 0;
libuna_unicode_character_t unicode_character = 0;
if( utf32_string == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-32 string.",
function );
return( -1 );
}
if( utf32_string_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-32 string size value exceeds maximum.",
function );
return( -1 );
}
if( utf8_stream_size == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream size.",
function );
return( -1 );
}
/* Add the byte order mark
*/
*utf8_stream_size = 3;
while( utf32_string_iterator < utf32_string_size )
{
/* Convert the UTF-32 character bytes into a Unicode character
*/
if( libuna_unicode_character_copy_from_utf32(
&unicode_character,
utf32_string,
utf32_string_size,
&utf32_string_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_INPUT_FAILED,
"%s: unable to copy Unicode character from UTF-32.",
function );
return( -1 );
}
/* Determine how many UTF-8 character bytes are required
*/
if( libuna_unicode_character_size_to_utf8(
unicode_character,
utf8_stream_size,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_OUTPUT_FAILED,
"%s: unable to determine size of Unicode character in UTF-8.",
function );
return( -1 );
}
}
return( 1 );
}
/* Copies an UTF-8 stream from an UTF-32 string
* Returns 1 if successful or -1 on error
*/
int libuna_utf8_stream_copy_from_utf32(
uint8_t *utf8_stream,
size_t utf8_stream_size,
const libuna_utf32_character_t *utf32_string,
size_t utf32_string_size,
liberror_error_t **error )
{
static char *function = "libuna_utf8_stream_copy_from_utf32";
size_t utf32_string_iterator = 0;
size_t utf8_stream_iterator = 0;
libuna_unicode_character_t unicode_character = 0;
if( utf8_stream == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-8 stream.",
function );
return( -1 );
}
if( utf8_stream_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-8 stream size value exceeds maximum.",
function );
return( -1 );
}
if( utf32_string == NULL )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_INVALID_VALUE,
"%s: invalid UTF-32 string.",
function );
return( -1 );
}
if( utf32_string_size > (size_t) SSIZE_MAX )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_ARGUMENTS,
LIBERROR_ARGUMENT_ERROR_VALUE_EXCEEDS_MAXIMUM,
"%s: invalid UTF-32 string size value exceeds maximum.",
function );
return( -1 );
}
if( libuna_utf8_stream_copy_byte_order_mark(
utf8_stream,
utf8_stream_size,
&utf8_stream_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_OUTPUT_FAILED,
"%s: unable to copy UTF-8 byte order mark.",
function );
return( -1 );
}
while( utf32_string_iterator < utf32_string_size )
{
/* Convert the UTF-32 character bytes into a Unicode character
*/
if( libuna_unicode_character_copy_from_utf32(
&unicode_character,
utf32_string,
utf32_string_size,
&utf32_string_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_INPUT_FAILED,
"%s: unable to copy Unicode character from UTF-32.",
function );
return( -1 );
}
/* Convert the Unicode character into UTF-8 character bytes
*/
if( libuna_unicode_character_copy_to_utf8(
unicode_character,
utf8_stream,
utf8_stream_size,
&utf8_stream_iterator,
error ) != 1 )
{
liberror_error_set(
error,
LIBERROR_ERROR_DOMAIN_CONVERSION,
LIBERROR_CONVERSION_ERROR_OUTPUT_FAILED,
"%s: unable to copy Unicode character to UTF-8 stream.",
function );
return( -1 );
}
}
return( 1 );
}
File Metadata
Details
Attached
Mime Type
text/x-c
Expires
Fri, Nov 21, 3:17 AM (1 d, 10 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
1360804
Default Alt Text
libuna_utf8_stream.c (15 KB)
Attached To
Mode
rXMOUNT xmount
Attached
Detach File
Event Timeline
Log In to Comment