Merge branch 'master' of github.com:dragonfly-science/mdbtools into merge-108

Merge in pull request #108 with a few changes:

* Use the newer mdb_print_col function

* Redefine the last argument of mdb_print_col to be a flags argument

* Rename and redefine the BINEXPORT enums. While technically public,
  these were never intended as a public API.

* Name the command line option --escape-c
This commit is contained in:
Evan Miller
2020-12-19 13:21:32 -05:00
4 changed files with 74 additions and 43 deletions

View File

@@ -20,6 +20,8 @@ OPTIONS
-Q, --no-quote Don't wrap text-like fields (text, memo, date) in quotes. If not specified text fiels will be surrounded by " (double quote) characters. -Q, --no-quote Don't wrap text-like fields (text, memo, date) in quotes. If not specified text fiels will be surrounded by " (double quote) characters.
-q, --quote char Use char to wrap text-like fields. Default is " (double quote). -q, --quote char Use char to wrap text-like fields. Default is " (double quote).
-X, --escape char Use char to escape quoted characters within a field. Default is doubling. -X, --escape char Use char to escape quoted characters within a field. Default is doubling.
-e, --escape-c Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are.
Use char to escape quoted characters within a field. Default is doubling.
-I, --insert backend INSERT statements (instead of CSV). You must specify which SQL backend dialect to use. Allowed values are: access, sybase, oracle, postgres, mysql and sqlite. -I, --insert backend INSERT statements (instead of CSV). You must specify which SQL backend dialect to use. Allowed values are: access, sybase, oracle, postgres, mysql and sqlite.
-N, --namespace prefix Prefix identifiers with prefix. -N, --namespace prefix Prefix identifiers with prefix.
-S, --batch-size int Size of insert batches on supported platforms. -S, --batch-size int Size of insert batches on supported platforms.

View File

@@ -185,10 +185,11 @@ enum {
/* csv export binary options */ /* csv export binary options */
enum { enum {
MDB_BINEXPORT_STRIP, MDB_EXPORT_BINARY_STRIP = (1 << 0),
MDB_BINEXPORT_RAW, MDB_EXPORT_BINARY_RAW = (1 << 1),
MDB_BINEXPORT_OCTAL, MDB_EXPORT_BINARY_OCTAL = (1 << 2),
MDB_BINEXPORT_HEXADECIMAL MDB_EXPORT_BINARY_HEXADECIMAL = (1 << 3),
MDB_EXPORT_ESCAPE_CONTROL_CHARS = (1 << 4)
}; };
#define IS_JET4(mdb) (mdb->f->jet_version==MDB_VER_JET4) /* obsolete */ #define IS_JET4(mdb) (mdb->f->jet_version==MDB_VER_JET4) /* obsolete */
@@ -546,7 +547,7 @@ void mdb_register_backend(MdbHandle *mdb, char *backend_name, guint32 capabiliti
gchar* (*quote_schema_name)(const gchar*, const gchar*)); gchar* (*quote_schema_name)(const gchar*, const gchar*));
int mdb_set_default_backend(MdbHandle *mdb, const char *backend_name); int mdb_set_default_backend(MdbHandle *mdb, const char *backend_name);
void mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace, guint32 export_options); void mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace, guint32 export_options);
void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode); void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int flags);
/* sargs.c */ /* sargs.c */
int mdb_test_sargs(MdbTableDef *table, MdbField *fields, int num_fields); int mdb_test_sargs(MdbTableDef *table, MdbField *fields, int num_fields);

View File

@@ -968,48 +968,66 @@ mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace
#define is_quote_type(x) (is_binary_type(x) || x==MDB_TEXT || x==MDB_MEMO || x==MDB_DATETIME) #define is_quote_type(x) (is_binary_type(x) || x==MDB_TEXT || x==MDB_MEMO || x==MDB_DATETIME)
//#define DONT_ESCAPE_ESCAPE //#define DONT_ESCAPE_ESCAPE
void void
mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode) mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len,
char *quote_char, char *escape_char, int flags)
/* quote_text: Don't quote if 0. /* quote_text: Don't quote if 0.
*/ */
{ {
size_t quote_len = strlen(quote_char); /* multibyte */ size_t quote_len = strlen(quote_char); /* multibyte */
size_t orig_escape_len = escape_char ? strlen(escape_char) : 0; size_t orig_escape_len = escape_char ? strlen(escape_char) : 0;
int quoting = quote_text && is_quote_type(col_type);
int bin_mode = (flags & 0x0F);
int escape_cr_lf = !!(flags & MDB_EXPORT_ESCAPE_CONTROL_CHARS);
/* double the quote char if no escape char passed */ /* double the quote char if no escape char passed */
if (!escape_char) if (!escape_char)
escape_char = quote_char; escape_char = quote_char;
if (quote_text && is_quote_type(col_type)) { if (quoting)
fputs(quote_char, outfile); fputs(quote_char, outfile);
while (1) {
if (is_binary_type(col_type)) {
if (bin_mode == MDB_BINEXPORT_STRIP)
break;
if (!bin_len--)
break;
} else /* use \0 sentry */
if (!*col_val)
break;
int is_binary_hex_col = is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL; while (1) {
if (is_binary_type(col_type)) {
if (bin_mode == MDB_EXPORT_BINARY_STRIP)
break;
if (!bin_len--)
break;
} else /* use \0 sentry */
if (!*col_val)
break;
if (quote_len && !strncmp(col_val, quote_char, quote_len) && !is_binary_hex_col) { if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_OCTAL) {
fprintf(outfile, "%s%s", escape_char, quote_char); fprintf(outfile, "\\%03o", *(unsigned char*)col_val++);
col_val += quote_len; } else if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_HEXADECIMAL) {
fprintf(outfile, "%02X", *(unsigned char*)col_val++);
} else if (quoting && quote_len && !strncmp(col_val, quote_char, quote_len)) {
fprintf(outfile, "%s%s", escape_char, quote_char);
col_val += quote_len;
#ifndef DONT_ESCAPE_ESCAPE #ifndef DONT_ESCAPE_ESCAPE
} else if (orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len) && !is_binary_hex_col) { } else if (quoting && orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len)) {
fprintf(outfile, "%s%s", escape_char, escape_char); fprintf(outfile, "%s%s", escape_char, escape_char);
col_val += orig_escape_len; col_val += orig_escape_len;
#endif #endif
} else if (is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_OCTAL) { } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\r') {
fprintf(outfile, "\\%03o", *(unsigned char*)col_val++); col_val++;
} else if (is_binary_hex_col) { putc('\\', outfile);
fprintf(outfile, "%02X", *(unsigned char*)col_val++); putc('r', outfile);
} else } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\n') {
putc(*col_val++, outfile); col_val++;
} putc('\\', outfile);
putc('n', outfile);
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\t') {
col_val++;
putc('\\', outfile);
putc('t', outfile);
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\\') {
col_val++;
putc('\\', outfile);
putc('\\', outfile);
} else
putc(*col_val++, outfile);
}
if (quoting)
fputs(quote_char, outfile); fputs(quote_char, outfile);
} else
fputs(col_val, outfile);
} }

View File

@@ -42,13 +42,14 @@ main(int argc, char **argv)
int quote_text = 1; int quote_text = 1;
int boolean_words = 0; int boolean_words = 0;
int batch_size = 1000; int batch_size = 1000;
int escape_cr_lf = 0;
char *insert_dialect = NULL; char *insert_dialect = NULL;
char *shortdate_fmt = NULL; char *shortdate_fmt = NULL;
char *date_fmt = NULL; char *date_fmt = NULL;
char *namespace = NULL; char *namespace = NULL;
char *str_bin_mode = NULL; char *str_bin_mode = NULL;
char *null_text = NULL; char *null_text = NULL;
int bin_mode = MDB_BINEXPORT_RAW; int export_flags = 0;
char *value; char *value;
size_t length; size_t length;
int ret; int ret;
@@ -60,6 +61,7 @@ main(int argc, char **argv)
{"no-quote", 'Q', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &quote_text, "Don't wrap text-like fields in quotes.", NULL}, {"no-quote", 'Q', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, &quote_text, "Don't wrap text-like fields in quotes.", NULL},
{"quote", 'q', 0, G_OPTION_ARG_STRING, &quote_char, "Use <char> to wrap text-like fields. Default is double quote.", "char"}, {"quote", 'q', 0, G_OPTION_ARG_STRING, &quote_char, "Use <char> to wrap text-like fields. Default is double quote.", "char"},
{"escape", 'X', 0, G_OPTION_ARG_STRING, &escape_char, "Use <char> to escape quoted characters within a field. Default is doubling.", "format"}, {"escape", 'X', 0, G_OPTION_ARG_STRING, &escape_char, "Use <char> to escape quoted characters within a field. Default is doubling.", "format"},
{"escape-c", 'e', 0, G_OPTION_ARG_NONE, &escape_cr_lf, "Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are.", NULL},
{"insert", 'I', 0, G_OPTION_ARG_STRING, &insert_dialect, "INSERT statements (instead of CSV)", "backend"}, {"insert", 'I', 0, G_OPTION_ARG_STRING, &insert_dialect, "INSERT statements (instead of CSV)", "backend"},
{"namespace", 'N', 0, G_OPTION_ARG_STRING, &namespace, "Prefix identifiers with namespace", "namespace"}, {"namespace", 'N', 0, G_OPTION_ARG_STRING, &namespace, "Prefix identifiers with namespace", "namespace"},
{"batch-size", 'S', 0, G_OPTION_ARG_INT, &batch_size, "Size of insert batches on supported platforms.", "int"}, {"batch-size", 'S', 0, G_OPTION_ARG_INT, &batch_size, "Size of insert batches on supported platforms.", "int"},
@@ -120,17 +122,23 @@ main(int argc, char **argv)
if (str_bin_mode) { if (str_bin_mode) {
if (!strcmp(str_bin_mode, "strip")) if (!strcmp(str_bin_mode, "strip"))
bin_mode = MDB_BINEXPORT_STRIP; export_flags |= MDB_EXPORT_BINARY_STRIP;
else if (!strcmp(str_bin_mode, "raw")) else if (!strcmp(str_bin_mode, "raw"))
bin_mode = MDB_BINEXPORT_RAW; export_flags |= MDB_EXPORT_BINARY_RAW;
else if (!strcmp(str_bin_mode, "octal")) else if (!strcmp(str_bin_mode, "octal"))
bin_mode = MDB_BINEXPORT_OCTAL; export_flags |= MDB_EXPORT_BINARY_OCTAL;
else if (!strcmp(str_bin_mode, "hex")) else if (!strcmp(str_bin_mode, "hex"))
bin_mode = MDB_BINEXPORT_HEXADECIMAL; export_flags |= MDB_EXPORT_BINARY_HEXADECIMAL;
else { else {
fputs("Invalid binary mode\n", stderr); fputs("Invalid binary mode\n", stderr);
exit(1); exit(1);
} }
} else {
export_flags |= MDB_EXPORT_BINARY_RAW;
}
if (escape_cr_lf) {
export_flags |= MDB_EXPORT_ESCAPE_CONTROL_CHARS;
} }
/* Open file */ /* Open file */
@@ -229,7 +237,7 @@ main(int argc, char **argv)
value = bound_values[i]; value = bound_values[i];
length = bound_lens[i]; length = bound_lens[i];
} }
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags);
if (col->col_type == MDB_OLE) if (col->col_type == MDB_OLE)
free(value); free(value);
} }
@@ -282,21 +290,23 @@ main(int argc, char **argv)
length = bound_lens[i]; length = bound_lens[i];
} }
/* Correctly handle insertion of binary blobs into SQLite using the string literal notation of X'1234ABCD...' */ /* Correctly handle insertion of binary blobs into SQLite using the string literal notation of X'1234ABCD...' */
if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) { if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type)
&& (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) {
char *quote_char_binary_sqlite = (char *) g_strdup("'"); char *quote_char_binary_sqlite = (char *) g_strdup("'");
fputs("X", outfile); fputs("X", outfile);
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, export_flags);
g_free (quote_char_binary_sqlite); g_free (quote_char_binary_sqlite);
/* Correctly handle insertion of binary blobs into PostgreSQL using the notation of decode('1234ABCD...', 'hex') */ /* Correctly handle insertion of binary blobs into PostgreSQL using the notation of decode('1234ABCD...', 'hex') */
} else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) { } else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type)
&& (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) {
char *quote_char_binary_postgres = (char *) g_strdup("'"); char *quote_char_binary_postgres = (char *) g_strdup("'");
fputs("decode(", outfile); fputs("decode(", outfile);
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, export_flags);
fputs(", 'hex')", outfile); fputs(", 'hex')", outfile);
g_free (quote_char_binary_postgres); g_free (quote_char_binary_postgres);
/* No special treatment for other backends or when hexadecimal notation hasn't been selected with the -b hex command line option */ /* No special treatment for other backends or when hexadecimal notation hasn't been selected with the -b hex command line option */
} else { } else {
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode); mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags);
} }
if (col->col_type == MDB_OLE) if (col->col_type == MDB_OLE)
free(value); free(value);