mirror of
https://github.com/mdbtools/mdbtools.git
synced 2025-04-05 17:31:11 +08:00
Merge pull request #222 from evanmiller/merge-108
Revise and merge in #108 (new --escape-invisible flag to mdb-export)
This commit is contained in:
commit
0ad6fb33f6
@ -2,7 +2,7 @@ NAME
|
||||
mdb-export - Export data in an MDB database table to CSV format.
|
||||
|
||||
SYNOPSIS
|
||||
mdb-export [--no-header] [--delimiter delim] [--row-delimiter delim] [[--no-quote] | [--quote char [--escape char]]] [--date-format fmt] [--datetime-format fmt] [--bin strip|raw|octal|hex] [--boolean-words] database table
|
||||
mdb-export [--no-header] [--delimiter delim] [--row-delimiter delim] [[--no-quote] | [--quote char [--escape char]]] [--escape-invisible] [--date-format fmt] [--datetime-format fmt] [--bin strip|raw|octal|hex] [--boolean-words] database table
|
||||
mdb-export --insert backend [--namespace prefix] [--batch-size int] database table
|
||||
mdb-export -h|--help
|
||||
|
||||
@ -20,6 +20,7 @@ OPTIONS
|
||||
-Q, --no-quote Don't wrap text-like fields (text, memo, date) in quotes. If not specified text fiels will be surrounded by " (double quote) characters.
|
||||
-q, --quote char Use char to wrap text-like fields. Default is " (double quote).
|
||||
-X, --escape char Use char to escape quoted characters within a field. Default is doubling.
|
||||
-e, --escape-invisible Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are.
|
||||
-I, --insert backend INSERT statements (instead of CSV). You must specify which SQL backend dialect to use. Allowed values are: access, sybase, oracle, postgres, mysql and sqlite.
|
||||
-N, --namespace prefix Prefix identifiers with prefix.
|
||||
-S, --batch-size int Size of insert batches on supported platforms.
|
||||
|
@ -190,10 +190,11 @@ enum {
|
||||
|
||||
/* csv export binary options */
|
||||
enum {
|
||||
MDB_BINEXPORT_STRIP,
|
||||
MDB_BINEXPORT_RAW,
|
||||
MDB_BINEXPORT_OCTAL,
|
||||
MDB_BINEXPORT_HEXADECIMAL
|
||||
MDB_EXPORT_BINARY_STRIP = (1 << 0),
|
||||
MDB_EXPORT_BINARY_RAW = (1 << 1),
|
||||
MDB_EXPORT_BINARY_OCTAL = (1 << 2),
|
||||
MDB_EXPORT_BINARY_HEXADECIMAL = (1 << 3),
|
||||
MDB_EXPORT_ESCAPE_CONTROL_CHARS = (1 << 4)
|
||||
};
|
||||
|
||||
#define IS_JET4(mdb) (mdb->f->jet_version==MDB_VER_JET4) /* obsolete */
|
||||
@ -557,7 +558,7 @@ void mdb_register_backend(MdbHandle *mdb, char *backend_name, guint32 capabiliti
|
||||
gchar* (*quote_schema_name)(const gchar*, const gchar*));
|
||||
int mdb_set_default_backend(MdbHandle *mdb, const char *backend_name);
|
||||
void mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace, guint32 export_options);
|
||||
void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode);
|
||||
void mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int flags);
|
||||
|
||||
/* sargs.c */
|
||||
int mdb_test_sargs(MdbTableDef *table, MdbField *fields, int num_fields);
|
||||
|
@ -1004,48 +1004,66 @@ mdb_print_schema(MdbHandle *mdb, FILE *outfile, char *tabname, char *dbnamespace
|
||||
#define is_quote_type(x) (is_binary_type(x) || x==MDB_TEXT || x==MDB_MEMO || x==MDB_DATETIME)
|
||||
//#define DONT_ESCAPE_ESCAPE
|
||||
void
|
||||
mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len, char *quote_char, char *escape_char, int bin_mode)
|
||||
mdb_print_col(FILE *outfile, gchar *col_val, int quote_text, int col_type, int bin_len,
|
||||
char *quote_char, char *escape_char, int flags)
|
||||
/* quote_text: Don't quote if 0.
|
||||
*/
|
||||
{
|
||||
size_t quote_len = strlen(quote_char); /* multibyte */
|
||||
|
||||
size_t orig_escape_len = escape_char ? strlen(escape_char) : 0;
|
||||
int quoting = quote_text && is_quote_type(col_type);
|
||||
int bin_mode = (flags & 0x0F);
|
||||
int escape_cr_lf = !!(flags & MDB_EXPORT_ESCAPE_CONTROL_CHARS);
|
||||
|
||||
/* double the quote char if no escape char passed */
|
||||
if (!escape_char)
|
||||
escape_char = quote_char;
|
||||
|
||||
if (quote_text && is_quote_type(col_type)) {
|
||||
if (quoting)
|
||||
fputs(quote_char, outfile);
|
||||
while (1) {
|
||||
if (is_binary_type(col_type)) {
|
||||
if (bin_mode == MDB_BINEXPORT_STRIP)
|
||||
break;
|
||||
if (!bin_len--)
|
||||
break;
|
||||
} else /* use \0 sentry */
|
||||
if (!*col_val)
|
||||
break;
|
||||
|
||||
int is_binary_hex_col = is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL;
|
||||
while (1) {
|
||||
if (is_binary_type(col_type)) {
|
||||
if (bin_mode == MDB_EXPORT_BINARY_STRIP)
|
||||
break;
|
||||
if (!bin_len--)
|
||||
break;
|
||||
} else /* use \0 sentry */
|
||||
if (!*col_val)
|
||||
break;
|
||||
|
||||
if (quote_len && !strncmp(col_val, quote_char, quote_len) && !is_binary_hex_col) {
|
||||
fprintf(outfile, "%s%s", escape_char, quote_char);
|
||||
col_val += quote_len;
|
||||
if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_OCTAL) {
|
||||
fprintf(outfile, "\\%03o", *(unsigned char*)col_val++);
|
||||
} else if (is_binary_type(col_type) && bin_mode == MDB_EXPORT_BINARY_HEXADECIMAL) {
|
||||
fprintf(outfile, "%02X", *(unsigned char*)col_val++);
|
||||
} else if (quoting && quote_len && !strncmp(col_val, quote_char, quote_len)) {
|
||||
fprintf(outfile, "%s%s", escape_char, quote_char);
|
||||
col_val += quote_len;
|
||||
#ifndef DONT_ESCAPE_ESCAPE
|
||||
} else if (orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len) && !is_binary_hex_col) {
|
||||
fprintf(outfile, "%s%s", escape_char, escape_char);
|
||||
col_val += orig_escape_len;
|
||||
} else if (quoting && orig_escape_len && !strncmp(col_val, escape_char, orig_escape_len)) {
|
||||
fprintf(outfile, "%s%s", escape_char, escape_char);
|
||||
col_val += orig_escape_len;
|
||||
#endif
|
||||
} else if (is_binary_type(col_type) && bin_mode == MDB_BINEXPORT_OCTAL) {
|
||||
fprintf(outfile, "\\%03o", *(unsigned char*)col_val++);
|
||||
} else if (is_binary_hex_col) {
|
||||
fprintf(outfile, "%02X", *(unsigned char*)col_val++);
|
||||
} else
|
||||
putc(*col_val++, outfile);
|
||||
}
|
||||
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\r') {
|
||||
col_val++;
|
||||
putc('\\', outfile);
|
||||
putc('r', outfile);
|
||||
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\n') {
|
||||
col_val++;
|
||||
putc('\\', outfile);
|
||||
putc('n', outfile);
|
||||
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\t') {
|
||||
col_val++;
|
||||
putc('\\', outfile);
|
||||
putc('t', outfile);
|
||||
} else if (escape_cr_lf && is_quote_type(col_type) && *col_val=='\\') {
|
||||
col_val++;
|
||||
putc('\\', outfile);
|
||||
putc('\\', outfile);
|
||||
} else
|
||||
putc(*col_val++, outfile);
|
||||
}
|
||||
if (quoting)
|
||||
fputs(quote_char, outfile);
|
||||
} else
|
||||
fputs(col_val, outfile);
|
||||
}
|
||||
|
@ -42,13 +42,14 @@ main(int argc, char **argv)
|
||||
int quote_text = 1;
|
||||
int boolean_words = 0;
|
||||
int batch_size = 1000;
|
||||
int escape_cr_lf = 0;
|
||||
char *insert_dialect = NULL;
|
||||
char *shortdate_fmt = NULL;
|
||||
char *date_fmt = NULL;
|
||||
char *namespace = NULL;
|
||||
char *str_bin_mode = NULL;
|
||||
char *null_text = NULL;
|
||||
int bin_mode = MDB_BINEXPORT_RAW;
|
||||
int export_flags = 0;
|
||||
char *value;
|
||||
size_t length;
|
||||
int ret;
|
||||
@ -60,6 +61,7 @@ main(int argc, char **argv)
|
||||
{"no-quote", 'Q', G_OPTION_FLAG_REVERSE, G_OPTION_ARG_NONE, "e_text, "Don't wrap text-like fields in quotes.", NULL},
|
||||
{"quote", 'q', 0, G_OPTION_ARG_STRING, "e_char, "Use <char> to wrap text-like fields. Default is double quote.", "char"},
|
||||
{"escape", 'X', 0, G_OPTION_ARG_STRING, &escape_char, "Use <char> to escape quoted characters within a field. Default is doubling.", "format"},
|
||||
{"escape-invisible", 'e', 0, G_OPTION_ARG_NONE, &escape_cr_lf, "Use C-style escaping for return (\\r), tab (\\t), line-feed (\\n), and back-slash (\\\\) characters. Default is to leave as they are.", NULL},
|
||||
{"insert", 'I', 0, G_OPTION_ARG_STRING, &insert_dialect, "INSERT statements (instead of CSV)", "backend"},
|
||||
{"namespace", 'N', 0, G_OPTION_ARG_STRING, &namespace, "Prefix identifiers with namespace", "namespace"},
|
||||
{"batch-size", 'S', 0, G_OPTION_ARG_INT, &batch_size, "Size of insert batches on supported platforms.", "int"},
|
||||
@ -120,17 +122,23 @@ main(int argc, char **argv)
|
||||
|
||||
if (str_bin_mode) {
|
||||
if (!strcmp(str_bin_mode, "strip"))
|
||||
bin_mode = MDB_BINEXPORT_STRIP;
|
||||
export_flags |= MDB_EXPORT_BINARY_STRIP;
|
||||
else if (!strcmp(str_bin_mode, "raw"))
|
||||
bin_mode = MDB_BINEXPORT_RAW;
|
||||
export_flags |= MDB_EXPORT_BINARY_RAW;
|
||||
else if (!strcmp(str_bin_mode, "octal"))
|
||||
bin_mode = MDB_BINEXPORT_OCTAL;
|
||||
export_flags |= MDB_EXPORT_BINARY_OCTAL;
|
||||
else if (!strcmp(str_bin_mode, "hex"))
|
||||
bin_mode = MDB_BINEXPORT_HEXADECIMAL;
|
||||
export_flags |= MDB_EXPORT_BINARY_HEXADECIMAL;
|
||||
else {
|
||||
fputs("Invalid binary mode\n", stderr);
|
||||
exit(1);
|
||||
}
|
||||
} else {
|
||||
export_flags |= MDB_EXPORT_BINARY_RAW;
|
||||
}
|
||||
|
||||
if (escape_cr_lf) {
|
||||
export_flags |= MDB_EXPORT_ESCAPE_CONTROL_CHARS;
|
||||
}
|
||||
|
||||
/* Open file */
|
||||
@ -229,7 +237,7 @@ main(int argc, char **argv)
|
||||
value = bound_values[i];
|
||||
length = bound_lens[i];
|
||||
}
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode);
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags);
|
||||
if (col->col_type == MDB_OLE)
|
||||
free(value);
|
||||
}
|
||||
@ -282,21 +290,23 @@ main(int argc, char **argv)
|
||||
length = bound_lens[i];
|
||||
}
|
||||
/* Correctly handle insertion of binary blobs into SQLite using the string literal notation of X'1234ABCD...' */
|
||||
if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) {
|
||||
if (!strcmp(mdb->backend_name, "sqlite") && is_binary_type(col->col_type)
|
||||
&& (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) {
|
||||
char *quote_char_binary_sqlite = (char *) g_strdup("'");
|
||||
fputs("X", outfile);
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, bin_mode);
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_sqlite, escape_char, export_flags);
|
||||
g_free (quote_char_binary_sqlite);
|
||||
/* Correctly handle insertion of binary blobs into PostgreSQL using the notation of decode('1234ABCD...', 'hex') */
|
||||
} else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type) && bin_mode == MDB_BINEXPORT_HEXADECIMAL) {
|
||||
} else if (!strcmp(mdb->backend_name, "postgres") && is_binary_type(col->col_type)
|
||||
&& (export_flags & MDB_EXPORT_BINARY_HEXADECIMAL)) {
|
||||
char *quote_char_binary_postgres = (char *) g_strdup("'");
|
||||
fputs("decode(", outfile);
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, bin_mode);
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char_binary_postgres, escape_char, export_flags);
|
||||
fputs(", 'hex')", outfile);
|
||||
g_free (quote_char_binary_postgres);
|
||||
/* No special treatment for other backends or when hexadecimal notation hasn't been selected with the -b hex command line option */
|
||||
} else {
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, bin_mode);
|
||||
mdb_print_col(outfile, value, quote_text, col->col_type, length, quote_char, escape_char, export_flags);
|
||||
}
|
||||
if (col->col_type == MDB_OLE)
|
||||
free(value);
|
||||
|
Loading…
Reference in New Issue
Block a user