]> Devoid-pointer.net GitWeb - libHPCS.git/commitdiff
- Read strings from data files as UTF-16LE encoded strings (Unix
authorMichal Malý <madcatxster@devoid-pointer.net>
Fri, 27 Mar 2015 00:48:54 +0000 (01:48 +0100)
committerMichal Malý <madcatxster@devoid-pointer.net>
Fri, 27 Mar 2015 00:48:54 +0000 (01:48 +0100)
implementation)
- Use ICU converter to convert from UChars to UTF-8
- Reduce minimum ICU version to 52.0
- Bump SOVERSION

CMakeLists.txt
libhpcs.c
libhpcs_p.h

index 4aa0dbba4bb7127ab4c75e05b6bc1a34caab5f58..e7be294e6d46ec52d6160de75990b8f70494bbac 100644 (file)
@@ -17,7 +17,7 @@ else()
   add_definitions(-D_HPCS_LITTLE_ENDIAN)
 endif()
 
-find_package(ICU 54 REQUIRED COMPONENTS uc io)
+find_package(ICU 52 REQUIRED COMPONENTS uc io)
 
 set(libHPCS_SRCS
     libhpcs.c)
@@ -29,8 +29,8 @@ include_directories(
 add_library(HPCS SHARED ${libHPCS_SRCS})
 target_link_libraries(HPCS PRIVATE ${ICU_LIBRARIES})
 set_target_properties(HPCS
-                     PROPERTIES VERSION 2.0
-                     SOVERSION 2.0)
+                     PROPERTIES VERSION 3.0
+                     SOVERSION 3.0)
 
 if (${BuildTestTool})
   set(libHPCS_test_SRCS
index 0ab677523eaf593910b6946d5fbe074bf0c8728e..3fcdf41060e6b861beb7fdacb2dfb0498a947960 100644 (file)
--- a/libhpcs.c
+++ b/libhpcs.c
@@ -804,7 +804,8 @@ static enum HPCS_ParseCode read_string_at_offset(FILE* datafile, const HPCS_offs
        /* String is stored as native Windows WCHAR */
        return __win32_wchar_to_utf8(result, string);
 #else
-       #error "Not implemented"
+       /* Explicitly convert from UTF-16LE (internal WCHAR representation) */
+       return __unix_wchar_to_utf8(result, string, str_length * SEGMENT_SIZE);
 #endif
 }
 
@@ -913,35 +914,42 @@ static void __unix_hpcs_destroy()
 
 static enum HPCS_ParseCode __unix_icu_to_utf8(char** target, const UChar* s)
 {
-       UChar32 c;
+       int32_t utf8_size;
+       UConverter* cnv;
        UErrorCode uec = U_ZERO_ERROR;
-       int32_t utf8_size = 0;
-       int32_t idx = 0;
-#ifndef NDEBUG
-       int32_t wrt_size;
-#define pWrt_size &wrt_size
-#else
-#define pWrt_size NULL
-#endif
 
-       do {
-               U16_NEXT(s, idx, -1, c);
-               utf8_size += U8_LENGTH(c);
-       } while (c != 0);
+       cnv = ucnv_open("UTF-8", &uec);
+       if (U_FAILURE(uec)) {
+               PR_DEBUGF("Unable to create converter, error: %s\n", u_errorName(uec));
+               return PARSE_E_INTERNAL;
+       }
 
-       *target = malloc(utf8_size);
-       if (*target == NULL)
-               return PARSE_E_NO_MEM;
+       utf8_size = ucnv_fromUChars(cnv, NULL, 0, s, -1, &uec);
+       if (U_FAILURE(uec) && uec != U_BUFFER_OVERFLOW_ERROR) {
+               ucnv_close(cnv);
+               PR_DEBUGF("Count ucnv_fromUChars(), error: %s\n", u_errorName(uec));
+               return PARSE_E_INTERNAL;
+       }
+       uec = U_ZERO_ERROR;
 
-       u_strToUTF8(*target, utf8_size, pWrt_size, s, -1, &uec);
+       if (utf8_size == 0) {
+               ucnv_close(cnv);
+               return PARSE_E_CANT_READ;
+       }
 
-       PR_DEBUGF("Memory allocated: %d, Units written: %d, UEC: %x\n", utf8_size, wrt_size, uec);
-       PR_DEBUGF("Resulting string: %s\n", *target);
+       *target = malloc(utf8_size + 1);
+       if (*target == NULL) {
+               ucnv_close(cnv);
+               return PARSE_E_NO_MEM;
+       }
+       memset(*target, 0, utf8_size + 1);
 
+       ucnv_fromUChars(cnv, *target, utf8_size, s, -1, &uec);
+       ucnv_close(cnv);
        if (U_FAILURE(uec)) {
-               PR_DEBUGF("ICU error: %s\n", u_errorName(uec));
                free(*target);
-               return PARSE_E_CANT_READ;
+               PR_DEBUGF("Convert ucnv_fromUChars(), error: %s\n", u_errorName(uec));
+               return PARSE_E_INTERNAL;
        }
 
        return PARSE_OK;
@@ -990,6 +998,52 @@ static enum HPCS_ParseCode __unix_parse_native_method_info_line(char** name, cha
 
        return PARSE_OK;
 }
+
+static enum HPCS_ParseCode __unix_wchar_to_utf8(char** target, const char* bytes, const size_t bytes_count)
+{
+       int32_t u_size;
+       UChar* u_str;
+       UConverter* cnv;
+       enum HPCS_ParseCode ret;
+       UErrorCode uec = U_ZERO_ERROR;
+
+       cnv = ucnv_open("UTF-16LE", &uec);
+       if (U_FAILURE(uec)) {
+               PR_DEBUGF("Unable to create converter, error: %s\n", u_errorName(uec));
+               return PARSE_E_INTERNAL;
+       }
+
+       u_size = ucnv_toUChars(cnv, NULL, 0, bytes, bytes_count, &uec);
+       if (U_FAILURE(uec) && uec != U_BUFFER_OVERFLOW_ERROR) {
+               ucnv_close(cnv);
+               PR_DEBUGF("Count ucnv_toUchars(), error: %s\n", u_errorName(uec));
+               return PARSE_E_INTERNAL;
+       }
+       uec = U_ZERO_ERROR;
+
+       if (u_size == 0) {
+               ucnv_close(cnv);
+               return PARSE_E_CANT_READ;
+       }
+       u_str = calloc(u_size + 1, sizeof(UChar));
+       if (u_str == NULL) {
+               ucnv_close(cnv);
+               return PARSE_E_NO_MEM;
+       }
+       memset(u_str, 0, (u_size + 1) * sizeof(UChar));
+
+       ucnv_toUChars(cnv, u_str, u_size, bytes, bytes_count, &uec);
+       ucnv_close(cnv);
+       if (U_FAILURE(uec)) {
+               free(u_str);
+               PR_DEBUGF("Convert ucnv_toUchars(), error: %s\n", u_errorName(uec));
+               return PARSE_E_INTERNAL;
+       }
+
+       ret = __unix_icu_to_utf8(target, u_str);
+       free(u_str);
+       return ret;
+}
 #endif
 
 #ifdef __cplusplus
index 923e106522b6c1606fc2517a50af609571937446..c775ad821966636e2d339c34b974ace0c7a96ba4 100644 (file)
@@ -158,6 +158,7 @@ static enum HPCS_ParseCode __unix_icu_to_utf8(char** target, const UChar* s);
 static HPCS_UFH __unix_open_data_file(const char* filename);
 static enum HPCS_ParseCode __unix_next_native_line(UFILE* fh, UChar* line, int32_t length);
 static enum HPCS_ParseCode __unix_parse_native_method_info_line(char** name, char** value, UChar* line);
+static enum HPCS_ParseCode __unix_wchar_to_utf8(char** target, const char* bytes, const size_t bytes_count);
 
 #define __ICU_INIT_STRING(dst, s) do { \
        UChar temp[64]; \