unicode compression for Access 2000

This commit is contained in:
brianb 2002-03-17 02:40:16 +00:00
parent 2f3d61c133
commit d13908dcf3
6 changed files with 64 additions and 9 deletions

View File

@ -1,3 +1,10 @@
Sat Mar 16 21:37:01 EST 2002 Brian Bruns <camber@ais.org>
* include/mdbtools.h: added oam usage map fields to TableDef
* src/extras/mdb-dump.c: added JET4 support
* src/libmdb/data.c: added code to check for unicode compression
* src/libmdb/mem.c(mdb_free_tabledef): clean up for oam fields
2002-03-15 Brian Bruns <camber@ais.org>
* src/libmdb/money.c: Two's complement fix from

25
HACKING
View File

@ -11,8 +11,9 @@ Pages
-----
MDB files are a set of pages. These pages are 2K (2048 bytes) in size, so in a
hex dump of the data they start on adreeses like xxx000 and xxx800. Acess 2000
has increased the page size to 4K.
hex dump of the data they start on addreeses like xxx000 and xxx800. Access
2000 has increased the page size to 4K and thus pages would appear on hex
addresses ending in xxx000.
Each page is known by a page_id of 3 bytes (max value is 0x07FFFF).
The start adresse of a page is at page_id * 0x800.
@ -51,7 +52,7 @@ The second byte is always 0x01 as far as I can tell.
At some point in the file the page layout is apparently abandoned though the
very last 2K in the file again looks like a valid page. The purpose of this
non-paged region is so far unknown. Could be a corrupt db as well.
non-paged region is so far unknown. Could be a corrupt db as well. My current thinking is that this area is unallocated pages based on the GAM (global allocation map stored on page 0x01).
Bytes after the first and second seemed to depend on the type of page, although bytes 4-7 seem to indicate a page type of some sort. 02 00 00 00 is found on all catalog pages.
@ -279,7 +280,7 @@ Notes for offset_row:
(These flags are delflag and lookupflag in source code)
Each data row looks like this:
Each data row looks like this (JET3):
+------+---------+----------------------------------------------------------+
| data | length | name | description |
@ -299,6 +300,22 @@ Note: For boolean fixed columns, the values are in null_table[]:
0 indicates a false value
1 indicates a true value
In Access 2000 (JET4) data rows are like this
+------+---------+----------------------------------------------------------+
| data | length | name | description |
+------+---------+----------------------------------------------------------+
| ???? | 2 bytes | num_cols | Number of columns stored in this row |
| ???? | n bytes | | Fixed length columns |
| ???? | n bytes | | Variable length columns |
| ???? | 2 bytes | fixed_len | length of data from beginning of record |
| ???? | n bytes | var_table[] | offset from start of row for each variable |
| | | | length column. (2 bytes per var column) |
| ???? | 2 bytes | var_len | number of variable length columns |
| ???? | n bytes | null_table[]| Null indicator. size is 1 byte per 8 cols. |
| | | | 0 indicates a null value. |
+------+---------+----------------------------------------------------------+
Note: it is possible for the offset to the beginning of a variable length
column to require more than one byte (if the sum of the lengths of columns is
greater than 255). I have no idea how this is represented in the data as I

View File

@ -152,6 +152,11 @@ typedef struct {
int cur_phys_pg;
int cur_row;
int noskip_del; /* don't skip deleted rows */
/* object allocation map */
int map_base_pg;
int map_sz;
unsigned char *usage_map;
/* */
} MdbTableDef;
typedef struct {

View File

@ -10,6 +10,11 @@
#include <limits.h>
#include <assert.h>
#include <ctype.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include <ctype.h>
#include <string.h>
int main(int argc, char **argv)
{
@ -20,8 +25,9 @@ int main(int argc, char **argv)
int length;
int pg=0;
char addr[10];
int jet4 = 0;
if (argc < 2) {
if (argc < 1) {
fprintf(stderr, "Usage: mdb-dump <filename>\n\n");
exit(1);
}
@ -29,9 +35,15 @@ int main(int argc, char **argv)
fprintf(stderr, "Couldn't open file %s\n", argv[1]);
exit(1);
}
fseek(in,0x14,SEEK_SET);
fread(data,1,1,in);
if (data[0]==0x01) {
jet4 = 1;
}
fseek(in,0,SEEK_SET);
while (length = fread(data,1,16,in)) {
sprintf(addr, "%06x", i);
if (!strcmp(&addr[3],"000") ) { //|| ! strcmp(&addr[3],"800")) {
if (!strcmp(&addr[3],"000") || (!jet4 && !strcmp(&addr[3],"800"))) {
fprintf(stdout,"-- Page 0x%04x (%d) --\n", pg, pg);
pg++;
}

View File

@ -433,9 +433,22 @@ int i;
return "";
}
if (mdb->jet_version==MDB_VER_JET4) {
for (i=0;i<size;i+=2)
text[i/2] = mdb->pg_buf[start + i];
text[size/2]='\0';
/*
for (i=0;i<size;i++) {
fprintf(stdout, "%c %02x ", mdb->pg_buf[start+i], mdb->pg_buf[start+i]);
}
fprintf(stdout, "\n");
*/
if (mdb->pg_buf[start]==0xff &&
mdb->pg_buf[start+1]==0xfe) {
strncpy(text, &mdb->pg_buf[start+2], size-2);
text[size-2]='\0';
} else {
/* convert unicode to ascii, rather sloppily */
for (i=0;i<size;i+=2)
text[i/2] = mdb->pg_buf[start + i];
text[size/2]='\0';
}
} else {
strncpy(text, &mdb->pg_buf[start], size);
text[size]='\0';

View File

@ -73,6 +73,7 @@ MdbTableDef *table;
}
void mdb_free_tabledef(MdbTableDef *table)
{
if (table->usage_map) free(table->usage_map);
if (table) free(table);
}
mdb_append_column(GPtrArray *columns, MdbColumn *in_col)