From a8414720e43671e45d9b3c71a0f7833e9c7fad0d Mon Sep 17 00:00:00 2001 From: Evan Miller Date: Sun, 20 Dec 2020 17:56:33 -0500 Subject: [PATCH] Use wcstombs when iconv is not present (#223) Replace the jerry-built UTF-16 => Latin-1 code path with a cross-platform wcstombs solution that emits UTF-8. This adds an element to the end of the MdbHandle struct, but should not break any existing code. A run-time option could be added later to emit other encodings, but people who care about such things can just use the iconv code path. --- configure.ac | 4 ++-- include/mdbtools.h | 9 +++++++++ src/libmdb/data.c | 3 ++- src/libmdb/iconv.c | 42 ++++++++++++++++++++++++++++++++++-------- 4 files changed, 47 insertions(+), 11 deletions(-) diff --git a/configure.ac b/configure.ac index 93bd00c..6fa6e36 100644 --- a/configure.ac +++ b/configure.ac @@ -27,7 +27,7 @@ AC_PROG_YACC dnl Checks for header files. AC_HEADER_STDC -AC_CHECK_HEADERS(fcntl.h limits.h unistd.h) +AC_CHECK_HEADERS(fcntl.h limits.h unistd.h xlocale.h) AC_CHECK_HEADERS(wordexp.h) AC_CHECK_LIB(mswstr, DBLCMapStringW) AC_CHECK_DECLS([program_invocation_short_name], [], [], [[ @@ -36,7 +36,7 @@ AC_CHECK_DECLS([program_invocation_short_name], [], [], [[ dnl Checks for library functions. VL_LIB_READLINE -AC_CHECK_FUNCS(strptime fmemopen gmtime_r) +AC_CHECK_FUNCS(strptime fmemopen gmtime_r wcstombs_l) dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST diff --git a/include/mdbtools.h b/include/mdbtools.h index e17e414..8c19783 100644 --- a/include/mdbtools.h +++ b/include/mdbtools.h @@ -39,6 +39,11 @@ #ifdef HAVE_ICONV #include +#else +#ifdef HAVE_XLOCALE_H +#include +#endif +#include #endif #ifdef _WIN32 @@ -291,6 +296,10 @@ typedef struct { #ifdef HAVE_ICONV iconv_t iconv_in; iconv_t iconv_out; +#elif defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + _locale_t locale; +#else + locale_t locale; #endif } MdbHandle; diff --git a/src/libmdb/data.c b/src/libmdb/data.c index b32844b..73db744 100644 --- a/src/libmdb/data.c +++ b/src/libmdb/data.c @@ -16,10 +16,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "mdbtools.h" + #define _XOPEN_SOURCE #include #include -#include "mdbtools.h" #define OFFSET_MASK 0x1fff #define OLE_BUFFER_SIZE (MDB_BIND_SIZE*64) diff --git a/src/libmdb/iconv.c b/src/libmdb/iconv.c index fb6e084..87264f1 100644 --- a/src/libmdb/iconv.c +++ b/src/libmdb/iconv.c @@ -78,14 +78,32 @@ static size_t decompressed2ascii_without_iconv(MdbHandle *mdb, const char *in_pt snprintf(dest, dlen, "%.*s%n", (int)len_in, in_ptr, &count); return count; } - /* rough UCS-2LE to ISO-8859-1 conversion */ - /* wcstombs would be better; see libxls implementation for - * a multi-platform solution */ - unsigned int i; - for (i=0; 2*i+1locale); +#elif defined(HAVE_WCSTOMBS_L) + count = wcstombs_l(dest, w, len_out, mdb->locale); +#else + locale_t oldlocale = uselocale(mdb->locale); + count = wcstombs(dest, w, len_out); + uselocale(oldlocale); +#endif + free(w); + if (count == (size_t)-1) + return 0; + + dest[count] = '\0'; + return count; } #endif @@ -247,6 +265,10 @@ void mdb_iconv_init(MdbHandle *mdb) mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code); mdb->iconv_in = iconv_open(iconv_code, jet3_iconv_code); } +#elif defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + mdb->locale = _create_locale(LC_CTYPE, ".65001"); +#else + mdb->locale = newlocale(LC_CTYPE_MASK, "C.UTF-8", NULL); #endif } void mdb_iconv_close(MdbHandle *mdb) @@ -254,6 +276,10 @@ void mdb_iconv_close(MdbHandle *mdb) #ifdef HAVE_ICONV if (mdb->iconv_out != (iconv_t)-1) iconv_close(mdb->iconv_out); if (mdb->iconv_in != (iconv_t)-1) iconv_close(mdb->iconv_in); +#elif defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS) + if (mdb->locale) _free_locale(mdb->locale); +#else + if (mdb->locale) freelocale(mdb->locale); #endif }