Use wcstombs when iconv is not present (#223)

Replace the jerry-built UTF-16 => Latin-1 code path with a cross-platform wcstombs solution that emits UTF-8.

This adds an element to the end of the MdbHandle struct, but should not break any existing code.

A run-time option could be added later to emit other encodings, but people who care about such things can just use the iconv code path.
This commit is contained in:
Evan Miller 2020-12-20 17:56:33 -05:00 committed by GitHub
parent fb6637c503
commit a8414720e4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 47 additions and 11 deletions

View File

@ -27,7 +27,7 @@ AC_PROG_YACC
dnl Checks for header files.
AC_HEADER_STDC
AC_CHECK_HEADERS(fcntl.h limits.h unistd.h)
AC_CHECK_HEADERS(fcntl.h limits.h unistd.h xlocale.h)
AC_CHECK_HEADERS(wordexp.h)
AC_CHECK_LIB(mswstr, DBLCMapStringW)
AC_CHECK_DECLS([program_invocation_short_name], [], [], [[
@ -36,7 +36,7 @@ AC_CHECK_DECLS([program_invocation_short_name], [], [], [[
dnl Checks for library functions.
VL_LIB_READLINE
AC_CHECK_FUNCS(strptime fmemopen gmtime_r)
AC_CHECK_FUNCS(strptime fmemopen gmtime_r wcstombs_l)
dnl Checks for typedefs, structures, and compiler characteristics.
AC_C_CONST

View File

@ -39,6 +39,11 @@
#ifdef HAVE_ICONV
#include <iconv.h>
#else
#ifdef HAVE_XLOCALE_H
#include <xlocale.h>
#endif
#include <locale.h>
#endif
#ifdef _WIN32
@ -291,6 +296,10 @@ typedef struct {
#ifdef HAVE_ICONV
iconv_t iconv_in;
iconv_t iconv_out;
#elif defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS)
_locale_t locale;
#else
locale_t locale;
#endif
} MdbHandle;

View File

@ -16,10 +16,11 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "mdbtools.h"
#define _XOPEN_SOURCE
#include <time.h>
#include <math.h>
#include "mdbtools.h"
#define OFFSET_MASK 0x1fff
#define OLE_BUFFER_SIZE (MDB_BIND_SIZE*64)

View File

@ -78,14 +78,32 @@ static size_t decompressed2ascii_without_iconv(MdbHandle *mdb, const char *in_pt
snprintf(dest, dlen, "%.*s%n", (int)len_in, in_ptr, &count);
return count;
}
/* rough UCS-2LE to ISO-8859-1 conversion */
/* wcstombs would be better; see libxls implementation for
* a multi-platform solution */
unsigned int i;
for (i=0; 2*i+1<len_in && i<dlen-1; i++)
dest[i] = (in_ptr[2*i+1] == 0) ? in_ptr[2*i] : '?';
dest[i] = '\0';
return i;
size_t i;
size_t count = 0;
size_t len_out = dlen - 1;
wchar_t *w = malloc((len_in/2+1)*sizeof(wchar_t));
for(i=0; i<len_in/2; i++)
{
w[i] = (unsigned char)in_ptr[2*i] + ((unsigned char)in_ptr[2*i+1] << 8);
}
w[len_in/2] = '\0';
#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS)
count = _wcstombs_l(dest, w, len_out, mdb->locale);
#elif defined(HAVE_WCSTOMBS_L)
count = wcstombs_l(dest, w, len_out, mdb->locale);
#else
locale_t oldlocale = uselocale(mdb->locale);
count = wcstombs(dest, w, len_out);
uselocale(oldlocale);
#endif
free(w);
if (count == (size_t)-1)
return 0;
dest[count] = '\0';
return count;
}
#endif
@ -247,6 +265,10 @@ void mdb_iconv_init(MdbHandle *mdb)
mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code);
mdb->iconv_in = iconv_open(iconv_code, jet3_iconv_code);
}
#elif defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS)
mdb->locale = _create_locale(LC_CTYPE, ".65001");
#else
mdb->locale = newlocale(LC_CTYPE_MASK, "C.UTF-8", NULL);
#endif
}
void mdb_iconv_close(MdbHandle *mdb)
@ -254,6 +276,10 @@ void mdb_iconv_close(MdbHandle *mdb)
#ifdef HAVE_ICONV
if (mdb->iconv_out != (iconv_t)-1) iconv_close(mdb->iconv_out);
if (mdb->iconv_in != (iconv_t)-1) iconv_close(mdb->iconv_in);
#elif defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) || defined(WINDOWS)
if (mdb->locale) _free_locale(mdb->locale);
#else
if (mdb->locale) freelocale(mdb->locale);
#endif
}