Make command-line arguments locale-aware (#237)

GLib will automatically convert command line options to UTF-8 provided that setlocale(LC_TYPE, "") is called first, and the argument type is STRING (but not FILENAME). Update the CLI tools to take advantage of this behavior, and likewise implement it in fakeglib.

GLib does not automatically convert non-option arguments (i.e. everything remaining in argv after option processing), so manually call g_locale_to_utf8 on these arguments when they represent table names. This should fix the CLI tools when processing non-ASCII table names in non-UTF-8 locales. Also update fakeglib to implement a fast and loose version of g_locale_to_utf8, and factor out some of the code page => iconv name logic in iconv.c so it can be used in our fake g_locale_to_utf8. This adds a new symbol mdb_iconv_name_from_code_page that is not advertised in the main header file. I did not want to include mdbtools.h from fakeglib.c, but maybe that's not important.
This commit is contained in:
Evan Miller 2021-01-14 17:34:50 -05:00 committed by GitHub
parent 42431bbba8
commit 0e3a627ee6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 234 additions and 54 deletions

View File

@ -27,11 +27,17 @@ jobs:
- name: SQL tests
run: bash -e -x ./test_sql.sh
- name: ODBC tests
run: env MDBPATH=test/data ./src/odbc/unittest
run: ./src/odbc/unittest
env:
MDBPATH: test/data
- name: pkg-config libmdb test
run: env PKG_CONFIG_PATH=. pkg-config libmdb --exists
run: pkg-config libmdb --exists
env:
PKG_CONFIG_PATH: .
- name: pkg-config libmdbsql test
run: env PKG_CONFIG_PATH=. pkg-config libmdbsql --exists
run: pkg-config libmdbsql --exists
env:
PKG_CONFIG_PATH: .
macos:
runs-on: macos-latest
strategy:
@ -59,11 +65,17 @@ jobs:
- name: SQL tests
run: bash -e -x ./test_sql.sh
- name: ODBC tests
run: env MDBPATH=test/data ./src/odbc/unittest
run: ./src/odbc/unittest
env:
MDBPATH: test/data
- name: pkg-config libmdb test
run: env PKG_CONFIG_PATH=. pkg-config libmdb --exists
run: pkg-config libmdb --exists
env:
PKG_CONFIG_PATH: .
- name: pkg-config libmdbsql test
run: env PKG_CONFIG_PATH=. pkg-config libmdbsql --exists
run: pkg-config libmdbsql --exists
env:
PKG_CONFIG_PATH: .
macos-iodbc:
runs-on: macos-latest
strategy:
@ -88,7 +100,9 @@ jobs:
- name: Make
run: make
- name: ODBC tests
run: env MDBPATH=test/data ./src/odbc/unittest
run: ./src/odbc/unittest
env:
MDBPATH: test/data
windows:
runs-on: windows-latest
env:

View File

@ -84,7 +84,9 @@ typedef struct GError {
typedef enum GOptionArg {
G_OPTION_ARG_NONE,
G_OPTION_ARG_STRING,
G_OPTION_ARG_INT
G_OPTION_ARG_INT,
G_OPTION_ARG_CALLBACK,
G_OPTION_ARG_FILENAME
} GOptionArg;
typedef enum GOptionFlags {
@ -145,10 +147,15 @@ char **g_strsplit(const char *haystack, const char *needle, int max_tokens);
void g_strfreev(char **dir);
char *g_strconcat(const char *first, ...);
char *g_strdup(const char *src);
char *g_strndup(const char *src, size_t len);
char *g_strdup_printf(const char *format, ...);
gchar *g_strdelimit(gchar *string, const gchar *delimiters, gchar new_delimiter);
void g_printerr(const gchar *format, ...);
/* conversion */
gchar *g_locale_to_utf8(const gchar *opsysstring, size_t len,
size_t *bytes_read, size_t *bytes_written, GError **error);
/* GString */
GString *g_string_new(const gchar *init);
GString *g_string_assign(GString *string, const gchar *rval);

View File

@ -30,6 +30,7 @@
#include <unistd.h>
#include <ctype.h>
#include <string.h>
#include <locale.h>
#ifdef HAVE_GLIB
#include <glib.h>
@ -37,14 +38,11 @@
#include <mdbfakeglib.h>
#endif
#ifdef HAVE_ICONV
#if defined(HAVE_ICONV)
#include <iconv.h>
#else
#ifdef HAVE_XLOCALE_H
#elif defined(HAVE_XLOCALE_H)
#include <xlocale.h>
#endif
#include <locale.h>
#endif
#ifdef _WIN32
#include <io.h>

View File

@ -28,6 +28,12 @@
#include <string.h>
#include <getopt.h>
#include <errno.h>
#ifdef HAVE_ICONV
#include <iconv.h>
#endif
/* Linked from libmdb */
const char *mdb_iconv_name_from_code_page(int code_page);
/* string functions */
@ -56,13 +62,7 @@ char **g_strsplit(const char *haystack, const char *needle, int max_tokens) {
int i = 0;
while ((found = strstr(haystack, needle))) {
// Windows lacks strndup
size_t chunk_len = found - haystack;
char *chunk = malloc(chunk_len + 1);
memcpy(chunk, haystack, chunk_len);
chunk[chunk_len] = 0;
ret[i++] = chunk;
ret[i++] = g_strndup(haystack, found - haystack);
haystack = found + strlen(needle);
}
ret[i] = strdup(haystack);
@ -129,6 +129,18 @@ char *g_strdup(const char *input) {
return g_memdup(input, len+1);
}
char *g_strndup(const char *src, size_t len) {
if (!src)
return NULL;
char *result = malloc(len+1);
size_t i=0;
while (*src && i<len) {
result[i++] = *src++;
}
result[i] = '\0';
return result;
}
char *g_strdup_printf(const char *format, ...) {
char *ret = NULL;
va_list argp;
@ -210,6 +222,61 @@ gchar *g_string_free (GString *string, gboolean free_segment) {
return data;
}
/* conversion */
gchar *g_locale_to_utf8(const gchar *opsysstring, size_t len,
size_t *bytes_read, size_t *bytes_written, GError **error) {
#ifdef HAVE_ICONV
iconv_t converter = NULL;
char *locale = setlocale(LC_CTYPE, NULL);
if (locale) {
while (*locale && *locale != '.') {
locale++;
}
if (locale[0] == '.') {
const char *iconv_name = NULL;
uint16_t code_page = 0;
if (sscanf(locale, ".%hu", &code_page) == 1) {
iconv_name = mdb_iconv_name_from_code_page(code_page);
} else {
iconv_name = &locale[1];
}
if (iconv_name == NULL || (converter = iconv_open("UTF-8", iconv_name)) == (iconv_t)-1) {
converter = NULL;
fprintf(stderr, "Warning: unsupported locale \"%s\". Non-ASCII command-line arguments may work incorrectly.\n", &locale[1]);
}
}
}
if (converter) {
size_t input_len = len == (size_t)-1 ? strlen(opsysstring) : len;
size_t utf8_len = 4*input_len;
size_t output_len = utf8_len;
char *utf8_string = malloc(utf8_len+1);
char *output = utf8_string;
char *input = (char *)opsysstring;
size_t result = iconv(converter, (ICONV_CONST char **)&input, &input_len, &output, &output_len);
iconv_close(converter);
if (result == (size_t)-1) {
if (error) {
*error = malloc(sizeof(GError));
(*error)->message = malloc(100);
snprintf((*error)->message, 100, "Invalid byte sequence in conversion input");
}
return NULL;
}
if (bytes_read)
*bytes_read = len - input_len;
if (bytes_written)
*bytes_written = utf8_len - output_len;
*output = '\0';
return utf8_string;
}
#endif
if (len == (size_t)-1)
return g_strdup(opsysstring);
return g_strndup(opsysstring, len);
}
/* GHashTable */
typedef struct MyNode {
@ -452,9 +519,8 @@ gboolean g_option_context_parse(GOptionContext *context,
GOptionArg arg = context->entries[i].arg;
count++;
len++;
if (arg == G_OPTION_ARG_STRING || arg == G_OPTION_ARG_INT) {
if (arg != G_OPTION_ARG_NONE)
len++;
}
}
struct option *long_opts = calloc(count+1, sizeof(struct option));
char *short_opts = calloc(1, len+1);
@ -463,9 +529,8 @@ gboolean g_option_context_parse(GOptionContext *context,
const GOptionEntry *entry = &context->entries[i];
GOptionArg arg = entry->arg;
short_opts[j++] = entry->short_name;
if (arg == G_OPTION_ARG_STRING || arg == G_OPTION_ARG_INT) {
if (arg != G_OPTION_ARG_NONE)
short_opts[j++] = ':';
}
long_opts[i].name = entry->long_name;
long_opts[i].has_arg = entry->arg == G_OPTION_ARG_NONE ? no_argument : required_argument;
}
@ -509,8 +574,16 @@ gboolean g_option_context_parse(GOptionContext *context,
free(long_opts);
return FALSE;
}
} else if (entry->arg == G_OPTION_ARG_STRING) {
} else if (entry->arg == G_OPTION_ARG_FILENAME) {
*(char **)entry->arg_data = strdup(optarg);
} else if (entry->arg == G_OPTION_ARG_STRING) {
char *result = g_locale_to_utf8(optarg, -1, NULL, NULL, error);
if (result == NULL) {
free(short_opts);
free(long_opts);
return FALSE;
}
*(char **)entry->arg_data = result;
}
}
*argc -= (optind - 1);

View File

@ -19,6 +19,8 @@
#include <errno.h>
#include "mdbtools.h"
const char *mdb_iconv_name_from_code_page(int code_page);
#ifndef MIN
#define MIN(a,b) (a>b ? b : a)
#endif
@ -259,6 +261,63 @@ mdb_target_charset(MdbHandle *mdb)
#endif
}
/* See: https://docs.microsoft.com/en-us/windows/win32/Intl/code-page-identifiers */
const char *mdb_iconv_name_from_code_page(int code_page) {
const char *jet3_iconv_code = NULL;
switch (code_page) {
case 437: jet3_iconv_code="IBM437"; break;
case 850: jet3_iconv_code="IBM850"; break;
case 852: jet3_iconv_code="IBM852"; break;
case 855: jet3_iconv_code="IBM855"; break;
case 860: jet3_iconv_code="IBM860"; break;
case 861: jet3_iconv_code="IBM861"; break;
case 862: jet3_iconv_code="IBM862"; break;
case 865: jet3_iconv_code="IBM865"; break;
case 866: jet3_iconv_code="IBM866"; break;
case 869: jet3_iconv_code="IBM869"; break;
case 874: jet3_iconv_code="WINDOWS-874"; break;
case 932: jet3_iconv_code="SHIFT-JIS"; break;
case 936: jet3_iconv_code="WINDOWS-936"; break;
case 950: jet3_iconv_code="BIG-5"; break;
case 951: jet3_iconv_code="BIG5-HKSCS"; break;
case 1200: jet3_iconv_code="UTF-16LE"; break;
case 1201: jet3_iconv_code="UTF-16BE"; break;
case 1250: jet3_iconv_code="WINDOWS-1250"; break;
case 1251: jet3_iconv_code="WINDOWS-1251"; break;
case 1252: jet3_iconv_code="WINDOWS-1252"; break;
case 1253: jet3_iconv_code="WINDOWS-1253"; break;
case 1254: jet3_iconv_code="WINDOWS-1254"; break;
case 1255: jet3_iconv_code="WINDOWS-1255"; break;
case 1256: jet3_iconv_code="WINDOWS-1256"; break;
case 1257: jet3_iconv_code="WINDOWS-1257"; break;
case 1258: jet3_iconv_code="WINDOWS-1258"; break;
case 1361: jet3_iconv_code="CP1361"; break;
case 12000: jet3_iconv_code="UTF-32LE"; break;
case 12001: jet3_iconv_code="UTF-32BE"; break;
case 20866: jet3_iconv_code="KOI8-R"; break;
case 20932: jet3_iconv_code="EUC-JP"; break;
case 21866: jet3_iconv_code="KOI8-U"; break;
case 28591: jet3_iconv_code="ISO-8859-1"; break;
case 28592: jet3_iconv_code="ISO-8859-2"; break;
case 28593: jet3_iconv_code="ISO-8859-3"; break;
case 28594: jet3_iconv_code="ISO-8859-4"; break;
case 28595: jet3_iconv_code="ISO-8859-5"; break;
case 28596: jet3_iconv_code="ISO-8859-6"; break;
case 28597: jet3_iconv_code="ISO-8859-7"; break;
case 28598: jet3_iconv_code="ISO-8859-8"; break;
case 28599: jet3_iconv_code="ISO-8859-9"; break;
case 28503: jet3_iconv_code="ISO-8859-13"; break;
case 28505: jet3_iconv_code="ISO-8859-15"; break;
case 51932: jet3_iconv_code="EUC-JP"; break;
case 51936: jet3_iconv_code="EUC-CN"; break;
case 51949: jet3_iconv_code="EUC-KR"; break;
case 65000: jet3_iconv_code="UTF-7"; break;
case 65001: jet3_iconv_code="UTF-8"; break;
default: break;
}
return jet3_iconv_code;
}
void mdb_iconv_init(MdbHandle *mdb)
{
const char *iconv_code;
@ -280,23 +339,10 @@ void mdb_iconv_init(MdbHandle *mdb)
/* Use code page embedded in the database */
/* Note that individual columns can override this value,
* but per-column code pages are not supported by libmdb */
switch (mdb->f->code_page) {
case 874: jet3_iconv_code="WINDOWS-874"; break;
case 932: jet3_iconv_code="SHIFT-JIS"; break;
case 936: jet3_iconv_code="WINDOWS-936"; break;
case 950: jet3_iconv_code="BIG-5"; break;
case 951: jet3_iconv_code="BIG5-HKSCS"; break;
case 1250: jet3_iconv_code="WINDOWS-1250"; break;
case 1251: jet3_iconv_code="WINDOWS-1251"; break;
case 1252: jet3_iconv_code="WINDOWS-1252"; break;
case 1253: jet3_iconv_code="WINDOWS-1253"; break;
case 1254: jet3_iconv_code="WINDOWS-1254"; break;
case 1255: jet3_iconv_code="WINDOWS-1255"; break;
case 1256: jet3_iconv_code="WINDOWS-1256"; break;
case 1257: jet3_iconv_code="WINDOWS-1257"; break;
case 1258: jet3_iconv_code="WINDOWS-1258"; break;
default: jet3_iconv_code="CP1252"; break;
}
jet3_iconv_code = mdb_iconv_name_from_code_page(mdb->f->code_page);
}
if (!jet3_iconv_code) {
jet3_iconv_code = "CP1252";
}
mdb->iconv_out = iconv_open(jet3_iconv_code, iconv_code);

View File

@ -25,6 +25,9 @@ int main(int argc, char **argv) {
MdbCatalogEntry *entry;
MdbTableDef *table;
int found = 0;
char *locale = NULL;
char *table_name = NULL;
GError *error = NULL;
if (argc < 3) {
fprintf(stderr, "Usage: %s <file> <table>\n", argv[0]);
@ -39,9 +42,16 @@ int main(int argc, char **argv) {
if (!mdb_read_catalog(mdb, MDB_TABLE)) {
return 1;
}
locale = setlocale(LC_CTYPE, "");
table_name = g_locale_to_utf8(argv[2], -1, NULL, NULL, &error);
setlocale(LC_CTYPE, locale);
if (!table_name) {
fprintf(stderr, "Error converting table argument: %s\n", error->message);
return 1;
}
for (i = 0; i < mdb->num_catalog; i++) {
entry = g_ptr_array_index(mdb->catalog, i);
if (entry->object_type == MDB_TABLE && !g_ascii_strcasecmp(entry->object_name, argv[2])) {
if (entry->object_type == MDB_TABLE && !g_ascii_strcasecmp(entry->object_name, table_name)) {
table = mdb_read_table(entry);
fprintf(stdout, "%d\n", table->num_rows);
found = 1;
@ -51,7 +61,7 @@ int main(int argc, char **argv) {
// check was found:
if (!found) {
fprintf(stderr, "No table named %s found (among %d tables in file).\n", argv[2], mdb->num_catalog);
fprintf(stderr, "No table named %s found (among %d tables in file).\n", table_name, mdb->num_catalog);
return 1;
}

View File

@ -113,6 +113,8 @@ main(int argc, char **argv)
char *value;
size_t length;
int ret;
char *table_name = NULL;
char *locale = NULL;
GOptionEntry entries[] = {
{"date-format", 'D', 0, G_OPTION_ARG_STRING, &shortdate_fmt, "Set the date format (see strftime(3) for details)", "format"},
@ -126,20 +128,28 @@ main(int argc, char **argv)
opt_context = g_option_context_new("<file> <table> - export data from Access file to JSON");
g_option_context_add_main_entries(opt_context, entries, NULL /*i18n*/);
locale = setlocale(LC_CTYPE, "");
if (!g_option_context_parse (opt_context, &argc, &argv, &error))
{
fprintf(stderr, "option parsing failed: %s\n", error->message);
fputs(g_option_context_get_help(opt_context, TRUE, NULL), stderr);
exit (1);
}
if (argc != 3) {
fputs("Wrong number of arguments.\n\n", stderr);
fputs(g_option_context_get_help(opt_context, TRUE, NULL), stderr);
exit(1);
}
table_name = g_locale_to_utf8(argv[2], -1, NULL, NULL, &error);
if (!table_name) {
fprintf(stderr, "argument parsing failed: %s\n", error->message);
exit(1);
}
setlocale(LC_CTYPE, locale);
if (!(mdb = mdb_open(argv[1], MDB_NOFLAGS))) {
g_free(table_name);
exit(1);
}
@ -151,9 +161,9 @@ main(int argc, char **argv)
mdb_set_bind_size(mdb, EXPORT_BIND_SIZE);
table = mdb_read_table_by_name(mdb, argv[2], MDB_TABLE);
table = mdb_read_table_by_name(mdb, table_name, MDB_TABLE);
if (!table) {
fprintf(stderr, "Error: Table %s does not exist in this database.\n", argv[argc-1]);
fprintf(stderr, "Error: Table %s does not exist in this database.\n", table_name);
mdb_close(mdb);
exit(1);
}
@ -170,6 +180,7 @@ main(int argc, char **argv)
ret = mdb_bind_column(table, i+1, bound_values[i], &bound_lens[i]);
if (ret == -1) {
fprintf(stderr, "Failed to bind column %d\n", i + 1);
mdb_close(mdb);
exit(1);
}
}
@ -207,6 +218,7 @@ main(int argc, char **argv)
g_free(bound_values);
g_free(bound_lens);
mdb_free_tabledef(table);
g_free(table_name);
mdb_close(mdb);
return 0;

View File

@ -25,6 +25,8 @@ main(int argc, char **argv)
{
MdbHandle *mdb;
MdbTableDef *table;
char *table_name = NULL;
char *locale = NULL;
char *name;
gchar *propColName;
void *buf;
@ -46,8 +48,16 @@ main(int argc, char **argv)
return 1;
}
locale = setlocale(LC_CTYPE, "");
table_name = g_locale_to_utf8(argv[2], -1, NULL, NULL, NULL);
setlocale(LC_CTYPE, locale);
if (!table_name) {
mdb_close(mdb);
return 1;
}
table = mdb_read_table_by_name(mdb, "MSysObjects", MDB_ANY);
if (!table) {
g_free(table_name);
mdb_close(mdb);
return 1;
}
@ -62,13 +72,14 @@ main(int argc, char **argv)
g_free(name);
g_free(buf);
mdb_free_tabledef(table);
g_free(table_name);
mdb_close(mdb);
printf("Column %s not found in MSysObjects!\n", argv[3]);
return 1;
}
while(mdb_fetch_row(table)) {
if (!strcmp(name, argv[2])) {
if (!strcmp(name, table_name)) {
found = 1;
break;
}
@ -89,6 +100,12 @@ main(int argc, char **argv)
g_free(buf);
mdb_free_tabledef(table);
mdb_close(mdb);
g_free(table_name);
if (!found) {
printf("Object %s not found in database file!\n", argv[2]);
return 1;
}
return 0;
}

View File

@ -56,6 +56,7 @@ main (int argc, char **argv)
};
GError *error = NULL;
GOptionContext *opt_context;
char *old_locale = setlocale(LC_CTYPE, "");
opt_context = g_option_context_new("<file> [<backend>] - Dump schema");
g_option_context_add_main_entries(opt_context, entries, NULL /*i18n*/);
@ -67,6 +68,8 @@ main (int argc, char **argv)
exit (1);
}
setlocale(LC_CTYPE, old_locale);
if (argc < 2 || argc > 3) {
fputs("Wrong number of arguments.\n\n", stderr);
fputs(g_option_context_get_help(opt_context, TRUE, NULL), stderr);

View File

@ -340,8 +340,8 @@ main(int argc, char **argv)
{ "no-pretty-print", 'P', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &pretty_print, "Don't pretty print", NULL},
{ "no-header", 'H', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &headers, "Don't print header", NULL},
{ "no-footer", 'F', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &footers, "Don't print footer", NULL},
{ "input", 'i', 0, G_OPTION_ARG_STRING, &filename_in, "Read SQL from specified file", "file"},
{ "output", 'o', 0, G_OPTION_ARG_STRING, &filename_out, "Write result to specified file", "file"},
{ "input", 'i', 0, G_OPTION_ARG_FILENAME, &filename_in, "Read SQL from specified file", "file"},
{ "output", 'o', 0, G_OPTION_ARG_FILENAME, &filename_out, "Write result to specified file", "file"},
{ NULL },
};
GError *error = NULL;

View File

@ -16,7 +16,6 @@
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <locale.h>
#include "mdbtools.h"
#include "mdbver.h"
#include "mdbprivate.h"

View File

@ -3,13 +3,14 @@
# Simple test script; run after performing
# git clone https://github.com/mdbtools/mdbtestdata.git test
./src/util/mdb-json test/data/ASampleDatabase.accdb "Asset Items"
./src/util/mdb-json test/data/nwind.mdb "Customers"
./src/util/mdb-json test/data/nwind.mdb "Umsätze"
./src/util/mdb-count test/data/ASampleDatabase.accdb "Asset Items"
./src/util/mdb-count test/data/nwind.mdb "Customers"
./src/util/mdb-count test/data/nwind.mdb "Umsätze"
./src/util/mdb-prop test/data/ASampleDatabase.accdb "Asset Items"
./src/util/mdb-prop test/data/nwind.mdb "Customers"
./src/util/mdb-prop test/data/nwind.mdb "Umsätze"
./src/util/mdb-schema test/data/ASampleDatabase.accdb
./src/util/mdb-schema test/data/nwind.mdb
./src/util/mdb-schema test/data/nwind.mdb -T "Umsätze" postgres
./src/util/mdb-tables test/data/ASampleDatabase.accdb
./src/util/mdb-tables test/data/nwind.mdb
./src/util/mdb-ver test/data/ASampleDatabase.accdb