2000-03-13 02:21:17 +08:00
/* MDB Tools - A library for reading MS Access database file
* Copyright ( C ) 2000 Brian Bruns
*
2011-08-29 07:53:29 +08:00
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 2 of the License , or
* ( at your option ) any later version .
2000-03-13 02:21:17 +08:00
*
2011-08-29 07:53:29 +08:00
* This program is distributed in the hope that it will be useful ,
2000-03-13 02:21:17 +08:00
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
2011-08-29 07:53:29 +08:00
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
2000-03-13 02:21:17 +08:00
*
2011-08-29 07:53:29 +08:00
* You should have received a copy of the GNU General Public License along
* with this program ; if not , write to the Free Software Foundation , Inc . ,
* 51 Franklin Street , Fifth Floor , Boston , MA 02110 - 1301 USA .
2000-03-13 02:21:17 +08:00
*/
2011-08-29 07:53:29 +08:00
2000-03-13 02:21:17 +08:00
# include "mdbtools.h"
2020-09-03 02:30:07 +08:00
# define EXPORT_BIND_SIZE 200000
2003-04-30 01:55:09 +08:00
2012-06-29 08:28:51 +08:00
# define is_binary_type(x) (x==MDB_OLE || x==MDB_BINARY || x==MDB_REPID)
2004-02-09 11:52:25 +08:00
2005-09-08 07:27:43 +08:00
static char * escapes ( char * s ) ;
2004-02-09 11:52:25 +08:00
2003-01-21 00:04:24 +08:00
int
2000-03-13 02:21:17 +08:00
main ( int argc , char * * argv )
{
2013-01-14 04:20:00 +08:00
unsigned int i ;
2004-01-06 08:42:07 +08:00
MdbHandle * mdb ;
MdbTableDef * table ;
MdbColumn * col ;
2005-03-14 05:29:17 +08:00
char * * bound_values ;
2018-02-09 06:15:29 +08:00
int * bound_lens ;
2013-01-14 04:20:00 +08:00
FILE * outfile = stdout ;
2004-05-30 15:19:22 +08:00
char * delimiter = NULL ;
char * row_delimiter = NULL ;
2005-09-08 07:27:43 +08:00
char * quote_char = NULL ;
char * escape_char = NULL ;
2014-12-29 20:10:01 +08:00
int header_row = 1 ;
int quote_text = 1 ;
2016-04-07 04:45:06 +08:00
int boolean_words = 0 ;
2018-02-09 06:15:29 +08:00
int batch_size = 1000 ;
2017-01-26 12:26:01 +08:00
int escape_cr_lf = 0 ;
2010-06-21 04:27:42 +08:00
char * insert_dialect = NULL ;
2020-09-03 10:14:57 +08:00
char * shortdate_fmt = NULL ;
2014-12-29 20:10:01 +08:00
char * date_fmt = NULL ;
2012-01-30 18:41:04 +08:00
char * namespace = NULL ;
2014-12-29 20:10:01 +08:00
char * str_bin_mode = NULL ;
2016-08-02 00:11:21 +08:00
char * null_text = NULL ;
2020-12-20 02:21:32 +08:00
int export_flags = 0 ;
2011-02-17 07:57:40 +08:00
char * value ;
size_t length ;
2020-10-31 17:46:00 +08:00
int ret ;
2000-03-13 02:21:17 +08:00
2014-12-29 20:10:01 +08:00
GOptionEntry entries [ ] = {
2018-02-09 06:15:29 +08:00
{ " no-header " , ' H ' , G_OPTION_FLAG_REVERSE , G_OPTION_ARG_NONE , & header_row , " Suppress header row. " , NULL } ,
{ " delimiter " , ' d ' , 0 , G_OPTION_ARG_STRING , & delimiter , " Specify an alternative column delimiter. Default is comma. " , " char " } ,
{ " row-delimiter " , ' R ' , 0 , G_OPTION_ARG_STRING , & row_delimiter , " Specify a row delimiter " , " char " } ,
2020-10-25 18:24:28 +08:00
{ " no-quote " , ' Q ' , G_OPTION_FLAG_REVERSE , G_OPTION_ARG_NONE , & quote_text , " Don't wrap text-like fields in quotes. " , NULL } ,
2018-02-09 06:15:29 +08:00
{ " quote " , ' q ' , 0 , G_OPTION_ARG_STRING , & quote_char , " Use <char> to wrap text-like fields. Default is double quote. " , " char " } ,
2020-10-25 18:24:28 +08:00
{ " escape " , ' X ' , 0 , G_OPTION_ARG_STRING , & escape_char , " Use <char> to escape quoted characters within a field. Default is doubling. " , " format " } ,
2020-12-21 22:30:02 +08:00
{ " escape-invisible " , ' e ' , 0 , G_OPTION_ARG_NONE , & escape_cr_lf , " Use C-style escaping for return ( \\ r), tab ( \\ t), line-feed ( \\ n), and back-slash ( \\ \\ ) characters. Default is to leave as they are. " , NULL } ,
2020-10-25 18:34:53 +08:00
{ " insert " , ' I ' , 0 , G_OPTION_ARG_STRING , & insert_dialect , " INSERT statements (instead of CSV) " , " backend " } ,
2020-10-25 18:24:28 +08:00
{ " namespace " , ' N ' , 0 , G_OPTION_ARG_STRING , & namespace , " Prefix identifiers with namespace " , " namespace " } ,
{ " batch-size " , ' S ' , 0 , G_OPTION_ARG_INT , & batch_size , " Size of insert batches on supported platforms. " , " int " } ,
2020-09-03 10:14:57 +08:00
{ " date-format " , ' D ' , 0 , G_OPTION_ARG_STRING , & shortdate_fmt , " Set the date format (see strftime(3) for details) " , " format " } ,
{ " datetime-format " , ' T ' , 0 , G_OPTION_ARG_STRING , & date_fmt , " Set the date/time format (see strftime(3) for details) " , " format " } ,
2018-02-09 06:15:29 +08:00
{ " null " , ' 0 ' , 0 , G_OPTION_ARG_STRING , & null_text , " Use <char> to represent a NULL value " , " char " } ,
2020-09-03 09:33:49 +08:00
{ " bin " , ' b ' , 0 , G_OPTION_ARG_STRING , & str_bin_mode , " Binary export mode " , " strip|raw|octal|hex " } ,
2018-02-09 06:15:29 +08:00
{ " boolean-words " , ' B ' , 0 , G_OPTION_ARG_NONE , & boolean_words , " Use TRUE/FALSE in Boolean fields (default is 0/1) " , NULL } ,
{ NULL } ,
2014-12-29 20:10:01 +08:00
} ;
GError * error = NULL ;
GOptionContext * opt_context ;
opt_context = g_option_context_new ( " <file> <table> - export data from MDB file " ) ;
g_option_context_add_main_entries ( opt_context , entries , NULL /*i18n*/ ) ;
// g_option_context_set_strict_posix(opt_context, TRUE); /* options first, requires glib 2.44 */
if ( ! g_option_context_parse ( opt_context , & argc , & argv , & error ) )
{
fprintf ( stderr , " option parsing failed: %s \n " , error - > message ) ;
fputs ( g_option_context_get_help ( opt_context , TRUE , NULL ) , stderr ) ;
exit ( 1 ) ;
2004-05-30 15:19:22 +08:00
}
2014-12-29 20:10:01 +08:00
if ( argc ! = 3 ) {
fputs ( " Wrong number of arguments. \n \n " , stderr ) ;
fputs ( g_option_context_get_help ( opt_context , TRUE , NULL ) , stderr ) ;
2000-10-14 05:33:04 +08:00
exit ( 1 ) ;
}
2000-04-03 01:08:30 +08:00
2014-12-29 20:10:01 +08:00
/* Process options */
if ( quote_char )
quote_char = escapes ( quote_char ) ;
2020-08-06 06:27:48 +08:00
else if ( insert_dialect & & ! strcmp ( insert_dialect , " postgres " ) )
quote_char = g_strdup ( " ' " ) ;
else
2014-12-29 20:10:01 +08:00
quote_char = g_strdup ( " \" " ) ;
if ( delimiter )
delimiter = escapes ( delimiter ) ;
else
delimiter = g_strdup ( " , " ) ;
if ( row_delimiter )
row_delimiter = escapes ( row_delimiter ) ;
else
row_delimiter = g_strdup ( " \n " ) ;
if ( escape_char )
escape_char = escapes ( escape_char ) ;
if ( insert_dialect )
header_row = 0 ;
2016-08-02 00:11:21 +08:00
if ( null_text )
null_text = escapes ( null_text ) ;
else
null_text = g_strdup ( " " ) ;
2014-12-29 20:10:01 +08:00
if ( str_bin_mode ) {
if ( ! strcmp ( str_bin_mode , " strip " ) )
2020-12-20 02:21:32 +08:00
export_flags | = MDB_EXPORT_BINARY_STRIP ;
2014-12-29 20:10:01 +08:00
else if ( ! strcmp ( str_bin_mode , " raw " ) )
2020-12-20 02:21:32 +08:00
export_flags | = MDB_EXPORT_BINARY_RAW ;
2014-12-29 20:10:01 +08:00
else if ( ! strcmp ( str_bin_mode , " octal " ) )
2020-12-20 02:21:32 +08:00
export_flags | = MDB_EXPORT_BINARY_OCTAL ;
2020-09-02 21:53:50 +08:00
else if ( ! strcmp ( str_bin_mode , " hex " ) )
2020-12-20 02:21:32 +08:00
export_flags | = MDB_EXPORT_BINARY_HEXADECIMAL ;
2014-12-29 20:10:01 +08:00
else {
fputs ( " Invalid binary mode \n " , stderr ) ;
exit ( 1 ) ;
}
2020-12-20 02:21:32 +08:00
} else {
export_flags | = MDB_EXPORT_BINARY_RAW ;
}
if ( escape_cr_lf ) {
export_flags | = MDB_EXPORT_ESCAPE_CONTROL_CHARS ;
2014-12-29 20:10:01 +08:00
}
/* Open file */
if ( ! ( mdb = mdb_open ( argv [ 1 ] , MDB_NOFLAGS ) ) ) {
/* Don't bother clean up memory before exit */
2000-10-14 05:33:04 +08:00
exit ( 1 ) ;
}
2004-07-02 20:29:09 +08:00
2020-08-20 09:26:06 +08:00
if ( date_fmt )
mdb_set_date_fmt ( mdb , date_fmt ) ;
2020-09-03 10:14:57 +08:00
if ( shortdate_fmt )
mdb_set_shortdate_fmt ( mdb , shortdate_fmt ) ;
2020-08-20 09:26:06 +08:00
if ( boolean_words )
mdb_set_boolean_fmt_words ( mdb ) ;
2020-09-03 02:30:07 +08:00
mdb_set_bind_size ( mdb , EXPORT_BIND_SIZE ) ;
2010-06-21 04:27:42 +08:00
if ( insert_dialect )
if ( ! mdb_set_default_backend ( mdb , insert_dialect ) ) {
2014-12-29 20:10:01 +08:00
fputs ( " Invalid backend type \n " , stderr ) ;
/* Don't bother clean up memory before exit */
2010-06-21 04:27:42 +08:00
exit ( 1 ) ;
}
2014-12-29 20:10:01 +08:00
table = mdb_read_table_by_name ( mdb , argv [ 2 ] , MDB_TABLE ) ;
2004-07-02 20:29:09 +08:00
if ( ! table ) {
2014-12-29 20:10:01 +08:00
fprintf ( stderr , " Error: Table %s does not exist in this database. \n " , argv [ 2 ] ) ;
/* Don't bother clean up memory before exit */
2005-09-08 07:27:43 +08:00
exit ( 1 ) ;
2004-07-02 20:29:09 +08:00
}
2013-01-14 04:20:00 +08:00
/* read table */
2004-07-02 20:29:09 +08:00
mdb_read_columns ( table ) ;
mdb_rewind_table ( table ) ;
2018-02-09 06:15:29 +08:00
2005-03-14 05:29:17 +08:00
bound_values = ( char * * ) g_malloc ( table - > num_cols * sizeof ( char * ) ) ;
bound_lens = ( int * ) g_malloc ( table - > num_cols * sizeof ( int ) ) ;
2018-02-09 06:15:29 +08:00
for ( i = 0 ; i < table - > num_cols ; i + + ) {
2013-01-14 04:20:00 +08:00
/* bind columns */
2020-09-03 02:30:07 +08:00
bound_values [ i ] = ( char * ) g_malloc0 ( EXPORT_BIND_SIZE ) ;
2020-10-31 17:46:00 +08:00
ret = mdb_bind_column ( table , i + 1 , bound_values [ i ] , & bound_lens [ i ] ) ;
if ( ret = = - 1 ) {
fprintf ( stderr , " Failed to bind column %d \n " , i + 1 ) ;
exit ( 1 ) ;
}
2004-07-02 20:29:09 +08:00
}
if ( header_row ) {
2018-02-09 06:15:29 +08:00
for ( i = 0 ; i < table - > num_cols ; i + + ) {
col = g_ptr_array_index ( table - > columns , i ) ;
2013-01-14 04:20:00 +08:00
if ( i )
fputs ( delimiter , outfile ) ;
fputs ( col - > name , outfile ) ;
2004-07-02 20:29:09 +08:00
}
2013-01-14 04:20:00 +08:00
fputs ( row_delimiter , outfile ) ;
2004-01-11 05:46:14 +08:00
}
2000-03-13 02:21:17 +08:00
2018-02-09 06:15:29 +08:00
// TODO refactor this into functions
if ( mdb - > default_backend - > capabilities & MDB_SHEXP_BULK_INSERT ) {
//for efficiency do multi row insert on engines that support this
2020-08-10 04:15:20 +08:00
int counter = 0 ;
2018-02-09 06:15:29 +08:00
while ( mdb_fetch_row ( table ) ) {
if ( counter % batch_size = = 0 ) {
counter = 0 ; // reset to 0, prevent overflow on extremely large data sets.
char * quoted_name ;
quoted_name = mdb - > default_backend - > quote_schema_name ( namespace , argv [ 2 ] ) ;
fprintf ( outfile , " INSERT INTO %s ( " , quoted_name ) ;
2010-06-21 04:27:42 +08:00
free ( quoted_name ) ;
2018-02-09 06:15:29 +08:00
for ( i = 0 ; i < table - > num_cols ; i + + ) {
if ( i > 0 ) fputs ( " , " , outfile ) ;
col = g_ptr_array_index ( table - > columns , i ) ;
quoted_name = mdb - > default_backend - > quote_schema_name ( NULL , col - > name ) ;
fputs ( quoted_name , outfile ) ;
free ( quoted_name ) ;
}
fputs ( " ) VALUES " , outfile ) ;
} else {
fputs ( " , " , outfile ) ;
}
fputs ( " ( " , outfile ) ;
for ( i = 0 ; i < table - > num_cols ; i + + ) {
if ( i > 0 )
fputs ( delimiter , outfile ) ;
col = g_ptr_array_index ( table - > columns , i ) ;
if ( ! bound_lens [ i ] ) {
/* Don't quote NULLs */
if ( insert_dialect )
fputs ( " NULL " , outfile ) ;
else
fputs ( null_text , outfile ) ;
} else {
if ( col - > col_type = = MDB_OLE ) {
value = mdb_ole_read_full ( mdb , col , & length ) ;
} else {
value = bound_values [ i ] ;
length = bound_lens [ i ] ;
}
2020-12-20 02:21:32 +08:00
mdb_print_col ( outfile , value , quote_text , col - > col_type , length , quote_char , escape_char , export_flags ) ;
2018-02-09 06:15:29 +08:00
if ( col - > col_type = = MDB_OLE )
free ( value ) ;
}
}
fputs ( " ) " , outfile ) ;
if ( counter % batch_size = = batch_size - 1 ) {
fputs ( " ; " , outfile ) ;
fputs ( row_delimiter , outfile ) ;
}
counter + + ;
2004-07-02 20:29:09 +08:00
}
2018-02-09 06:15:29 +08:00
if ( counter % batch_size ! = 0 ) {
//if our last row did not land on closing tag, close the stement here
fputs ( " ; " , outfile ) ;
fputs ( row_delimiter , outfile ) ;
}
} else {
while ( mdb_fetch_row ( table ) ) {
2000-03-13 02:21:17 +08:00
2018-02-09 06:15:29 +08:00
if ( insert_dialect ) {
char * quoted_name ;
quoted_name = mdb - > default_backend - > quote_schema_name ( namespace , argv [ 2 ] ) ;
fprintf ( outfile , " INSERT INTO %s ( " , quoted_name ) ;
free ( quoted_name ) ;
for ( i = 0 ; i < table - > num_cols ; i + + ) {
if ( i > 0 ) fputs ( " , " , outfile ) ;
col = g_ptr_array_index ( table - > columns , i ) ;
quoted_name = mdb - > default_backend - > quote_schema_name ( NULL , col - > name ) ;
fputs ( quoted_name , outfile ) ;
free ( quoted_name ) ;
}
fputs ( " ) VALUES ( " , outfile ) ;
}
for ( i = 0 ; i < table - > num_cols ; i + + ) {
if ( i > 0 )
fputs ( delimiter , outfile ) ;
col = g_ptr_array_index ( table - > columns , i ) ;
if ( ! bound_lens [ i ] ) {
/* Don't quote NULLs */
if ( insert_dialect )
fputs ( " NULL " , outfile ) ;
else
fputs ( null_text , outfile ) ;
2011-02-17 07:57:40 +08:00
} else {
2018-02-09 06:15:29 +08:00
if ( col - > col_type = = MDB_OLE ) {
value = mdb_ole_read_full ( mdb , col , & length ) ;
} else {
value = bound_values [ i ] ;
length = bound_lens [ i ] ;
}
2020-09-02 21:53:50 +08:00
/* Correctly handle insertion of binary blobs into SQLite using the string literal notation of X'1234ABCD...' */
2020-12-20 02:21:32 +08:00
if ( ! strcmp ( mdb - > backend_name , " sqlite " ) & & is_binary_type ( col - > col_type )
& & ( export_flags & MDB_EXPORT_BINARY_HEXADECIMAL ) ) {
2020-09-02 21:53:50 +08:00
char * quote_char_binary_sqlite = ( char * ) g_strdup ( " ' " ) ;
fputs ( " X " , outfile ) ;
2020-12-20 02:21:32 +08:00
mdb_print_col ( outfile , value , quote_text , col - > col_type , length , quote_char_binary_sqlite , escape_char , export_flags ) ;
2020-09-02 21:53:50 +08:00
g_free ( quote_char_binary_sqlite ) ;
/* Correctly handle insertion of binary blobs into PostgreSQL using the notation of decode('1234ABCD...', 'hex') */
2020-12-20 02:21:32 +08:00
} else if ( ! strcmp ( mdb - > backend_name , " postgres " ) & & is_binary_type ( col - > col_type )
& & ( export_flags & MDB_EXPORT_BINARY_HEXADECIMAL ) ) {
2020-09-02 21:53:50 +08:00
char * quote_char_binary_postgres = ( char * ) g_strdup ( " ' " ) ;
fputs ( " decode( " , outfile ) ;
2020-12-20 02:21:32 +08:00
mdb_print_col ( outfile , value , quote_text , col - > col_type , length , quote_char_binary_postgres , escape_char , export_flags ) ;
2020-09-02 21:53:50 +08:00
fputs ( " , 'hex') " , outfile ) ;
g_free ( quote_char_binary_postgres ) ;
/* No special treatment for other backends or when hexadecimal notation hasn't been selected with the -b hex command line option */
} else {
2020-12-20 02:21:32 +08:00
mdb_print_col ( outfile , value , quote_text , col - > col_type , length , quote_char , escape_char , export_flags ) ;
2020-09-02 21:53:50 +08:00
}
2018-02-09 06:15:29 +08:00
if ( col - > col_type = = MDB_OLE )
free ( value ) ;
2011-02-17 07:57:40 +08:00
}
2000-03-13 02:21:17 +08:00
}
2018-02-09 06:15:29 +08:00
if ( insert_dialect ) fputs ( " ); " , outfile ) ;
fputs ( row_delimiter , outfile ) ;
2000-03-13 02:21:17 +08:00
}
2004-07-02 20:29:09 +08:00
}
2018-02-09 06:15:29 +08:00
2013-01-14 04:20:00 +08:00
/* free the memory used to bind */
for ( i = 0 ; i < table - > num_cols ; i + + ) {
g_free ( bound_values [ i ] ) ;
2000-03-13 02:21:17 +08:00
}
2005-03-14 05:29:17 +08:00
g_free ( bound_values ) ;
g_free ( bound_lens ) ;
2004-07-02 20:29:09 +08:00
mdb_free_tabledef ( table ) ;
2000-03-13 02:21:17 +08:00
2003-01-29 07:51:06 +08:00
mdb_close ( mdb ) ;
2014-12-29 20:10:01 +08:00
g_option_context_free ( opt_context ) ;
// g_free ignores NULL
g_free ( quote_char ) ;
g_free ( delimiter ) ;
g_free ( row_delimiter ) ;
g_free ( insert_dialect ) ;
g_free ( date_fmt ) ;
g_free ( escape_char ) ;
g_free ( namespace ) ;
g_free ( str_bin_mode ) ;
2011-02-17 07:57:40 +08:00
return 0 ;
2000-03-13 02:21:17 +08:00
}
2005-09-08 07:27:43 +08:00
static char * escapes ( char * s )
{
char * d = ( char * ) g_strdup ( s ) ;
char * t = d ;
2014-12-29 20:10:01 +08:00
char * orig = s ;
2005-09-08 07:27:43 +08:00
unsigned char encode = 0 ;
for ( ; * s ; s + + ) {
if ( encode ) {
switch ( * s ) {
case ' n ' : * t + + = ' \n ' ; break ;
case ' t ' : * t + + = ' \t ' ; break ;
case ' r ' : * t + + = ' \r ' ; break ;
default : * t + + = ' \\ ' ; * t + + = * s ; break ;
2018-02-09 06:15:29 +08:00
}
2005-09-08 07:27:43 +08:00
encode = 0 ;
} else if ( * s = = ' \\ ' ) {
encode = 1 ;
} else {
* t + + = * s ;
}
}
* t = ' \0 ' ;
2014-12-29 20:10:01 +08:00
g_free ( orig ) ;
2005-09-08 07:27:43 +08:00
return d ;
}