From: Michal MalĂ˝ Date: Fri, 27 Mar 2015 00:48:54 +0000 (+0100) Subject: - Read strings from data files as UTF-16LE encoded strings (Unix X-Git-Url: https://gitweb.devoid-pointer.net/?a=commitdiff_plain;h=ac324c9ec445487b1db4b89282ab0fecaeb211a4;p=libHPCS.git - Read strings from data files as UTF-16LE encoded strings (Unix implementation) - Use ICU converter to convert from UChars to UTF-8 - Reduce minimum ICU version to 52.0 - Bump SOVERSION --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 4aa0dbb..e7be294 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,7 +17,7 @@ else() add_definitions(-D_HPCS_LITTLE_ENDIAN) endif() -find_package(ICU 54 REQUIRED COMPONENTS uc io) +find_package(ICU 52 REQUIRED COMPONENTS uc io) set(libHPCS_SRCS libhpcs.c) @@ -29,8 +29,8 @@ include_directories( add_library(HPCS SHARED ${libHPCS_SRCS}) target_link_libraries(HPCS PRIVATE ${ICU_LIBRARIES}) set_target_properties(HPCS - PROPERTIES VERSION 2.0 - SOVERSION 2.0) + PROPERTIES VERSION 3.0 + SOVERSION 3.0) if (${BuildTestTool}) set(libHPCS_test_SRCS diff --git a/libhpcs.c b/libhpcs.c index 0ab6775..3fcdf41 100644 --- a/libhpcs.c +++ b/libhpcs.c @@ -804,7 +804,8 @@ static enum HPCS_ParseCode read_string_at_offset(FILE* datafile, const HPCS_offs /* String is stored as native Windows WCHAR */ return __win32_wchar_to_utf8(result, string); #else - #error "Not implemented" + /* Explicitly convert from UTF-16LE (internal WCHAR representation) */ + return __unix_wchar_to_utf8(result, string, str_length * SEGMENT_SIZE); #endif } @@ -913,35 +914,42 @@ static void __unix_hpcs_destroy() static enum HPCS_ParseCode __unix_icu_to_utf8(char** target, const UChar* s) { - UChar32 c; + int32_t utf8_size; + UConverter* cnv; UErrorCode uec = U_ZERO_ERROR; - int32_t utf8_size = 0; - int32_t idx = 0; -#ifndef NDEBUG - int32_t wrt_size; -#define pWrt_size &wrt_size -#else -#define pWrt_size NULL -#endif - do { - U16_NEXT(s, idx, -1, c); - utf8_size += U8_LENGTH(c); - } while (c != 0); + cnv = ucnv_open("UTF-8", &uec); + if (U_FAILURE(uec)) { + PR_DEBUGF("Unable to create converter, error: %s\n", u_errorName(uec)); + return PARSE_E_INTERNAL; + } - *target = malloc(utf8_size); - if (*target == NULL) - return PARSE_E_NO_MEM; + utf8_size = ucnv_fromUChars(cnv, NULL, 0, s, -1, &uec); + if (U_FAILURE(uec) && uec != U_BUFFER_OVERFLOW_ERROR) { + ucnv_close(cnv); + PR_DEBUGF("Count ucnv_fromUChars(), error: %s\n", u_errorName(uec)); + return PARSE_E_INTERNAL; + } + uec = U_ZERO_ERROR; - u_strToUTF8(*target, utf8_size, pWrt_size, s, -1, &uec); + if (utf8_size == 0) { + ucnv_close(cnv); + return PARSE_E_CANT_READ; + } - PR_DEBUGF("Memory allocated: %d, Units written: %d, UEC: %x\n", utf8_size, wrt_size, uec); - PR_DEBUGF("Resulting string: %s\n", *target); + *target = malloc(utf8_size + 1); + if (*target == NULL) { + ucnv_close(cnv); + return PARSE_E_NO_MEM; + } + memset(*target, 0, utf8_size + 1); + ucnv_fromUChars(cnv, *target, utf8_size, s, -1, &uec); + ucnv_close(cnv); if (U_FAILURE(uec)) { - PR_DEBUGF("ICU error: %s\n", u_errorName(uec)); free(*target); - return PARSE_E_CANT_READ; + PR_DEBUGF("Convert ucnv_fromUChars(), error: %s\n", u_errorName(uec)); + return PARSE_E_INTERNAL; } return PARSE_OK; @@ -990,6 +998,52 @@ static enum HPCS_ParseCode __unix_parse_native_method_info_line(char** name, cha return PARSE_OK; } + +static enum HPCS_ParseCode __unix_wchar_to_utf8(char** target, const char* bytes, const size_t bytes_count) +{ + int32_t u_size; + UChar* u_str; + UConverter* cnv; + enum HPCS_ParseCode ret; + UErrorCode uec = U_ZERO_ERROR; + + cnv = ucnv_open("UTF-16LE", &uec); + if (U_FAILURE(uec)) { + PR_DEBUGF("Unable to create converter, error: %s\n", u_errorName(uec)); + return PARSE_E_INTERNAL; + } + + u_size = ucnv_toUChars(cnv, NULL, 0, bytes, bytes_count, &uec); + if (U_FAILURE(uec) && uec != U_BUFFER_OVERFLOW_ERROR) { + ucnv_close(cnv); + PR_DEBUGF("Count ucnv_toUchars(), error: %s\n", u_errorName(uec)); + return PARSE_E_INTERNAL; + } + uec = U_ZERO_ERROR; + + if (u_size == 0) { + ucnv_close(cnv); + return PARSE_E_CANT_READ; + } + u_str = calloc(u_size + 1, sizeof(UChar)); + if (u_str == NULL) { + ucnv_close(cnv); + return PARSE_E_NO_MEM; + } + memset(u_str, 0, (u_size + 1) * sizeof(UChar)); + + ucnv_toUChars(cnv, u_str, u_size, bytes, bytes_count, &uec); + ucnv_close(cnv); + if (U_FAILURE(uec)) { + free(u_str); + PR_DEBUGF("Convert ucnv_toUchars(), error: %s\n", u_errorName(uec)); + return PARSE_E_INTERNAL; + } + + ret = __unix_icu_to_utf8(target, u_str); + free(u_str); + return ret; +} #endif #ifdef __cplusplus diff --git a/libhpcs_p.h b/libhpcs_p.h index 923e106..c775ad8 100644 --- a/libhpcs_p.h +++ b/libhpcs_p.h @@ -158,6 +158,7 @@ static enum HPCS_ParseCode __unix_icu_to_utf8(char** target, const UChar* s); static HPCS_UFH __unix_open_data_file(const char* filename); static enum HPCS_ParseCode __unix_next_native_line(UFILE* fh, UChar* line, int32_t length); static enum HPCS_ParseCode __unix_parse_native_method_info_line(char** name, char** value, UChar* line); +static enum HPCS_ParseCode __unix_wchar_to_utf8(char** target, const char* bytes, const size_t bytes_count); #define __ICU_INIT_STRING(dst, s) do { \ UChar temp[64]; \