diff --git a/doc/mdb-export.txt b/doc/mdb-export.txt index a46502a..f9193cc 100644 --- a/doc/mdb-export.txt +++ b/doc/mdb-export.txt @@ -20,6 +20,8 @@ OPTIONS -Q, --no-quote Don't wrap text-like fields (text, memo, date) in quotes. If not specified text fiels will be surrounded by " (double quote) characters. -q, --quote char Use char to wrap text-like fields. Default is " (double quote). -X, --escape char Use char to escape quoted characters within a field. Default is doubling. + -e, --escape-c Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are. + Use char to escape quoted characters within a field. Default is doubling. -I, --insert backend INSERT statements (instead of CSV). You must specify which SQL backend dialect to use. Allowed values are: access, sybase, oracle, postgres, mysql and sqlite. -N, --namespace prefix Prefix identifiers with prefix. -S, --batch-size int Size of insert batches on supported platforms. diff --git a/include/mdbtools.h b/include/mdbtools.h index e17e414..5f6e17b 100644 --- a/include/mdbtools.h +++ b/include/mdbtools.h @@ -185,10 +185,11 @@ enum { /* csv export binary options */ enum { - MDB_BINEXPORT_STRIP, - MDB_BINEXPORT_RAW, - MDB_BINEXPORT_OCTAL, - MDB_BINEXPORT_HEXADECIMAL + MDB_EXPORT_BINARY_STRIP = (1 << 0), + MDB_EXPORT_BINARY_RAW = (1 << 1), + MDB_EXPORT_BINARY_OCTAL = (1 << 2), + MDB_EXPORT_BINARY_HEXADECIMAL = (1 << 3), + MDB_EXPORT_ESCAPE_CONTROL_CHARS = (1 << 4) }; #define IS_JET4(mdb) (mdb->f->jet_version==MDB_VER_JET4) /* obsolete */ @@ -546,7 +547,7 @@ void mdb_register_backend(MdbHandle *mdb, char *backend_name, guint32 capabiliti gchar* (*quote_schema_name)(const gchar*, const gchar*)); int mdb_set_default_backend(MdbHandle *mdb, const char *backend_name); void mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace, guint32 export_options); -void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode); +void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int flags); /* sargs.c */ int mdb_test_sargs(MdbTableDef *table, MdbField *fields, int num_fields); diff --git a/src/libmdb/backend.c b/src/libmdb/backend.c index a3292d0..0f20150 100644 --- a/src/libmdb/backend.c +++ b/src/libmdb/backend.c @@ -968,48 +968,66 @@ mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace #define is_quote_type(x) (is_binary_type(x) || x==MDB_TEXT || x==MDB_MEMO || x==MDB_DATETIME) //#define DONT_ESCAPE_ESCAPE void -mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode) +mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, + char *quote_char, char *escape_char, int flags) /* quote_text: Don't quote if 0. */ { size_t quote_len = strlen(quote_char); /* multibyte */ size_t orig_escape_len = escape_char ? strlen(escape_char) : 0; + int quoting = quote_text && is_quote_type(col_type); + int bin_mode = (flags & 0x0F); + int escape_cr_lf = !!(flags & MDB_EXPORT_ESCAPE_CONTROL_CHARS); /* double the quote char if no escape char passed */ if (!escape_char) escape_char = quote_char; - if (quote_text && is_quote_type(col_type)) { + if (quoting) fputs(quote_char, outfile); - while (1) { - if (is_binary_type(col_type)) { - if (bin_mode == MDB_BINEXPORT_STRIP) - break; - if (!bin_len--) - break; - } else /* use \0 sentry */ - if (!*col_val) - break; - int is_binary_hex_col = is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL; + while (1) { + if (is_binary_type(col_type)) { + if (bin_mode == MDB_EXPORT_BINARY_STRIP) + break; + if (!bin_len--) + break; + } else /* use \0 sentry */ + if (!*col_val) + break; - if (quote_len && !strncmp(col_val, quote_char, quote_len) && !is_binary_hex_col) { - fprintf(outfile, "%s%s", escape_char, quote_char); - col_val += quote_len; + if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_OCTAL) { + fprintf(outfile, "\\%03o", *(unsigned char*)col_val++); + } else if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_HEXADECIMAL) { + fprintf(outfile, "%02X", *(unsigned char*)col_val++); + } else if (quoting && quote_len && !strncmp(col_val, quote_char, quote_len)) { + fprintf(outfile, "%s%s", escape_char, quote_char); + col_val += quote_len; #ifndef DONT_ESCAPE_ESCAPE - } else if (orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len) && !is_binary_hex_col) { - fprintf(outfile, "%s%s", escape_char, escape_char); - col_val += orig_escape_len; + } else if (quoting && orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len)) { + fprintf(outfile, "%s%s", escape_char, escape_char); + col_val += orig_escape_len; #endif - } else if (is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_OCTAL) { - fprintf(outfile, "\\%03o", *(unsigned char*)col_val++); - } else if (is_binary_hex_col) { - fprintf(outfile, "%02X", *(unsigned char*)col_val++); - } else - putc(*col_val++, outfile); - } + } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\r') { + col_val++; + putc('\\', outfile); + putc('r', outfile); + } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\n') { + col_val++; + putc('\\', outfile); + putc('n', outfile); + } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\t') { + col_val++; + putc('\\', outfile); + putc('t', outfile); + } else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\\') { + col_val++; + putc('\\', outfile); + putc('\\', outfile); + } else + putc(*col_val++, outfile); + } + if (quoting) fputs(quote_char, outfile); - } else - fputs(col_val, outfile); } diff --git a/src/util/mdb-export.c b/src/util/mdb-export.c index a230dc8..83a91c7 100755 --- a/src/util/mdb-export.c +++ b/src/util/mdb-export.c @@ -42,13 +42,14 @@ main(int argc, char **argv) int quote_text = 1; int boolean_words = 0; int batch_size = 1000; + int escape_cr_lf = 0; char *insert_dialect = NULL; char *shortdate_fmt = NULL; char *date_fmt = NULL; char *namespace = NULL; char *str_bin_mode = NULL; char *null_text = NULL; - int bin_mode = MDB_BINEXPORT_RAW; + int export_flags = 0; char *value; size_t length; int ret; @@ -60,6 +61,7 @@ main(int argc, char **argv) {"no-quote", 'Q', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, "e_text, "Don't wrap text-like fields in quotes.", NULL}, {"quote", 'q', 0, G_OPTION_ARG_STRING, "e_char, "Use to wrap text-like fields. Default is double quote.", "char"}, {"escape", 'X', 0, G_OPTION_ARG_STRING, &escape_char, "Use to escape quoted characters within a field. Default is doubling.", "format"}, + {"escape-c", 'e', 0, G_OPTION_ARG_NONE, &escape_cr_lf, "Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are.", NULL}, {"insert", 'I', 0, G_OPTION_ARG_STRING, &insert_dialect, "INSERT statements (instead of CSV)", "backend"}, {"namespace", 'N', 0, G_OPTION_ARG_STRING, &namespace, "Prefix identifiers with namespace", "namespace"}, {"batch-size", 'S', 0, G_OPTION_ARG_INT, &batch_size, "Size of insert batches on supported platforms.", "int"}, @@ -120,17 +122,23 @@ main(int argc, char **argv) if (str_bin_mode) { if (!strcmp(str_bin_mode, "strip")) - bin_mode = MDB_BINEXPORT_STRIP; + export_flags |= MDB_EXPORT_BINARY_STRIP; else if (!strcmp(str_bin_mode, "raw")) - bin_mode = MDB_BINEXPORT_RAW; + export_flags |= MDB_EXPORT_BINARY_RAW; else if (!strcmp(str_bin_mode, "octal")) - bin_mode = MDB_BINEXPORT_OCTAL; + export_flags |= MDB_EXPORT_BINARY_OCTAL; else if (!strcmp(str_bin_mode, "hex")) - bin_mode = MDB_BINEXPORT_HEXADECIMAL; + export_flags |= MDB_EXPORT_BINARY_HEXADECIMAL; else { fputs("Invalid binary mode\n", stderr); exit(1); } + } else { + export_flags |= MDB_EXPORT_BINARY_RAW; + } + + if (escape_cr_lf) { + export_flags |= MDB_EXPORT_ESCAPE_CONTROL_CHARS; } /* Open file */ @@ -229,7 +237,7 @@ main(int argc, char **argv) value = bound_values[i]; length = bound_lens[i]; } - mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode); + mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags); if (col->col_type == MDB_OLE) free(value); } @@ -282,21 +290,23 @@ main(int argc, char **argv) length = bound_lens[i]; } /* Correctly handle insertion of binary blobs into SQLite using the string literal notation of X'1234ABCD...' */ - if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) { + if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type) + && (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) { char *quote_char_binary_sqlite = (char *) g_strdup("'"); fputs("X", outfile); - mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, bin_mode); + mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, export_flags); g_free (quote_char_binary_sqlite); /* Correctly handle insertion of binary blobs into PostgreSQL using the notation of decode('1234ABCD...', 'hex') */ - } else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) { + } else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type) + && (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) { char *quote_char_binary_postgres = (char *) g_strdup("'"); fputs("decode(", outfile); - mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, bin_mode); + mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, export_flags); fputs(", 'hex')", outfile); g_free (quote_char_binary_postgres); /* No special treatment for other backends or when hexadecimal notation hasn't been selected with the -b hex command line option */ } else { - mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode); + mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags); } if (col->col_type == MDB_OLE) free(value);