From c9ba6488f4c26a7abf213ee92d19b9f7eab13f84 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Michal=20Mal=C3=BD?= Date: Thu, 26 Mar 2015 23:26:39 +0100 Subject: [PATCH] Read strings from data files as UTF-16LE encoded strings --- VS2013/libHPCS/libHPCS.vcxproj | 8 ++++---- libhpcs.c | 32 ++++++++++++++++++++++++-------- test_tool.c | 9 +++++++++ 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/VS2013/libHPCS/libHPCS.vcxproj b/VS2013/libHPCS/libHPCS.vcxproj index 02d7137..39289b8 100644 --- a/VS2013/libHPCS/libHPCS.vcxproj +++ b/VS2013/libHPCS/libHPCS.vcxproj @@ -91,7 +91,7 @@ Windows true Shlwapi.lib;%(AdditionalDependencies) - 2.0 + 3.0 @@ -107,7 +107,7 @@ Windows true Shlwapi.lib;%(AdditionalDependencies) - 2.0 + 3.0 @@ -127,7 +127,7 @@ true true Shlwapi.lib;%(AdditionalDependencies) - 2.0 + 3.0 @@ -147,7 +147,7 @@ true true Shlwapi.lib;%(AdditionalDependencies) - 2.0 + 3.0 diff --git a/libhpcs.c b/libhpcs.c index 33d69ff..0ab6775 100644 --- a/libhpcs.c +++ b/libhpcs.c @@ -476,6 +476,10 @@ out: return ret; } +/* This function assumes that the date information are composed only + of characters from ISO-8859-1 charset. Under such assumption it is + possible to treat UTF-8 strings as single-byte strings with ISO-8859-1 + encoding */ static enum HPCS_ParseCode read_date(FILE* datafile, struct HPCS_Date* date) { char* date_str; @@ -768,7 +772,7 @@ static enum HPCS_ParseCode read_sampling_rate(FILE* datafile, double* sampling_r static enum HPCS_ParseCode read_string_at_offset(FILE* datafile, const HPCS_offset offset, char** const result) { char* string; - uint8_t str_length, idx; + uint8_t str_length; size_t r; fseek(datafile, offset, SEEK_SET); @@ -779,18 +783,29 @@ static enum HPCS_ParseCode read_string_at_offset(FILE* datafile, const HPCS_offs if (r != 1) return PARSE_E_CANT_READ; - string = malloc(str_length + 1); + if (str_length == 0) { + *result = malloc(sizeof(char)); + *result[0] = 0; + return PARSE_OK; + } + + string = calloc(str_length + 1, SEGMENT_SIZE); if (string == NULL) return PARSE_E_NO_MEM; + memset(string, 0, (str_length + 1) * SEGMENT_SIZE); - for (idx = 0; idx < str_length; idx++) { - fread(string+idx, SMALL_SEGMENT_SIZE, 1, datafile); - fseek(datafile, SMALL_SEGMENT_SIZE, SEEK_CUR); + r = fread(string, SEGMENT_SIZE, str_length, datafile); + if (r < str_length) { + free(string); + return PARSE_E_CANT_READ; } - string[str_length] = 0; - *result = string; - return PARSE_OK; +#ifdef _WIN32 + /* String is stored as native Windows WCHAR */ + return __win32_wchar_to_utf8(result, string); +#else + #error "Not implemented" +#endif } /** Platform-specific functions */ @@ -869,6 +884,7 @@ static enum HPCS_ParseCode __win32_wchar_to_utf8(char** target, const WCHAR* s) PR_DEBUGF("Count WideCharToMultiByte() error: 0x%x\n", GetLastError()); return PARSE_E_INTERNAL; } + PR_DEBUGF("mb_size: %d\n", mb_size); *target = malloc(mb_size); if (*target == NULL) return PARSE_E_NO_MEM; diff --git a/test_tool.c b/test_tool.c index 1447961..4c7c172 100644 --- a/test_tool.c +++ b/test_tool.c @@ -22,6 +22,15 @@ int read_data(const char* path) return EXIT_FAILURE; } + printf("Sample info: %s\n" + "Operator name: %s\n" + "Method name: %s\n" + "Y units: %s\n", + mdata->sample_info, + mdata->operator_name, + mdata->method_name, + mdata->y_units); + for (di = 0; di < mdata->data_count; di++) printf("Time: %.17lg, Value: %.17lg\n", mdata->data[di].time, mdata->data[di].value); -- 2.43.5