From a62f26df50f63860f7bd3aaae3e359c8c6a75560 Mon Sep 17 00:00:00 2001 From: brianb Date: Wed, 11 Feb 2004 22:05:13 +0000 Subject: [PATCH] support for compressed index keys and trailing leaves --- src/libmdb/data.c | 11 +++-- src/libmdb/file.c | 19 +++++++++ src/libmdb/index.c | 103 +++++++++++++++++++++++++++++++++------------ src/libmdb/table.c | 25 +++++++---- src/libmdb/write.c | 25 ++++++++--- 5 files changed, 139 insertions(+), 44 deletions(-) diff --git a/src/libmdb/data.c b/src/libmdb/data.c index 0aa5a74..6358a41 100644 --- a/src/libmdb/data.c +++ b/src/libmdb/data.c @@ -525,13 +525,18 @@ mdb_read_next_dpg(MdbTableDef *table) MdbCatalogEntry *entry = table->entry; MdbHandle *mdb = entry->mdb; int map_type; +guint32 pg; #ifndef SLOW_READ map_type = table->usage_map[0]; if (map_type==0) { - return mdb_read_next_dpg_by_map0(table); + pg = mdb_read_next_dpg_by_map0(table); + //printf("Next dpg = %lu\n", pg); + return pg; } else if (map_type==1) { - return mdb_read_next_dpg_by_map1(table); + pg = mdb_read_next_dpg_by_map1(table); + //printf("Next dpg = %lu\n", pg); + return pg; } else { fprintf(stderr,"Warning: unrecognized usage map type: %d, defaulting to brute force read\n",table->usage_map[0]); } @@ -541,7 +546,7 @@ int map_type; if (!mdb_read_pg(mdb, table->cur_phys_pg++)) return 0; } while (mdb->pg_buf[0]!=0x01 || mdb_pg_get_int32(mdb, 4)!=entry->table_pg); - /* fprintf(stderr,"returning new page %ld\n", table->cur_phys_pg); */ + /* fprintf(stderr,"returning new page %ld\n", table->cur_phys_pg); */ return table->cur_phys_pg; } int mdb_rewind_table(MdbTableDef *table) diff --git a/src/libmdb/file.c b/src/libmdb/file.c index 867a577..c1b2b88 100644 --- a/src/libmdb/file.c +++ b/src/libmdb/file.c @@ -23,6 +23,25 @@ #include "dmalloc.h" #endif +/* +typedef struct { + int pg_size; + guint16 row_count_offset; + guint16 tab_num_rows_offset; + guint16 tab_num_cols_offset; + guint16 tab_num_idxs_offset; + guint16 tab_num_ridxs_offset; + guint16 tab_usage_map_offset; + guint16 tab_first_dpg_offset; + guint16 tab_cols_start_offset; + guint16 tab_ridx_entry_size; + guint16 col_fixed_offset; + guint16 col_size_offset; + guint16 col_num_offset; + guint16 tab_col_entry_size; + guint16 tab_free_map_offset; +} MdbFormatConstants; +*/ MdbFormatConstants MdbJet4Constants = { 4096, 0x0c, 16, 45, 47, 51, 55, 56, 63, 12, 15, 23, 5, 25, 59 }; diff --git a/src/libmdb/index.c b/src/libmdb/index.c index 1b6ec81..3d554bf 100644 --- a/src/libmdb/index.c +++ b/src/libmdb/index.c @@ -212,7 +212,7 @@ mdb_index_cache_sarg(MdbColumn *col, MdbSarg *sarg, MdbSarg *idx_sarg) //cache_int = sarg->value.i * -1; c = (unsigned char *) &(idx_sarg->value.i); c[0] |= 0x80; - printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]); + //printf("int %08x %02x %02x %02x %02x\n", sarg->value.i, c[0], c[1], c[2], c[3]); break; case MDB_INT: @@ -252,7 +252,7 @@ int lastchar; } #endif int -mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, int offset, int len) +mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, unsigned char *buf, int len) { int i, j; MdbColumn *col; @@ -265,16 +265,17 @@ mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, int offset, int len) //fprintf(stderr,"mdb_index_test_sargs called on "); //for (i=0;ipg_buf[offset+i]); + //fprintf(stderr,"%02x ",buf[i]); //mdb->pg_buf[offset+i]); //fprintf(stderr,"\n"); for (i=0;inum_keys;i++) { - c_offset++; /* the per column null indicator/flags */ + //c_offset++; /* the per column null indicator/flags */ col=g_ptr_array_index(table->columns,idx->key_col_num[i]-1); /* * This will go away eventually */ if (col->col_type==MDB_TEXT) { - c_len = strlen(&mdb->pg_buf[offset + c_offset]); + //c_len = strlen(&mdb->pg_buf[offset + c_offset]); + c_len = strlen(buf); } else { c_len = col->col_size; //fprintf(stderr,"Only text types currently supported. How did we get here?\n"); @@ -299,7 +300,8 @@ mdb_index_test_sargs(MdbHandle *mdb, MdbIndex *idx, int offset, int len) /* XXX - kludge */ node.op = sarg->op; node.value = sarg->value; - field.value = &mdb->pg_buf[offset + c_offset]; + //field.value = &mdb->pg_buf[offset + c_offset]; + field.value = buf; field.siz = c_len; field.is_null = FALSE; if (!mdb_test_sarg(mdb, col, &node, &field)) { @@ -364,8 +366,13 @@ mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) * we are simply done. (there is no page to find */ - if (mdb->pg_buf[0]==MDB_PAGE_LEAF) + if (mdb->pg_buf[0]==MDB_PAGE_LEAF) { + /* Indexes can have leaves at the end that don't appear + * in the upper tree, stash the last index found so + * we can follow it at the end. */ + chain->last_leaf_found = ipg->pg; return ipg; + } /* * apply sargs here, currently we don't @@ -387,7 +394,7 @@ mdb_find_next_leaf(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) */ newipg = mdb_chain_add_page(mdb, chain, pg); newipg = mdb_find_next_leaf(mdb, idx, chain); - printf("returning pg %lu\n",newipg->pg); + //printf("returning pg %lu\n",newipg->pg); return newipg; } while (!passed); /* no more pages */ @@ -438,6 +445,35 @@ mdb_index_read_bottom_pg(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) return ipg; } +/* + * unwind the stack and search for new leaf node + */ +MdbIndexPage * +mdb_index_unwind(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain) +{ + MdbIndexPage *ipg; + + //printf("page %lu finished\n",ipg->pg); + if (chain->cur_depth==1) { + //printf("cur_depth == 1 we're out\n"); + return NULL; + } + /* + * unwind the stack until we find something or reach + * the top. + */ + ipg = NULL; + while (chain->cur_depth>1 && ipg==NULL) { + //printf("chain depth %d\n", chain->cur_depth); + chain->cur_depth--; + ipg = mdb_find_next_leaf(mdb, idx, chain); + if (ipg) mdb_index_find_next_on_page(mdb, ipg); + } + if (chain->cur_depth==1) { + //printf("last leaf %lu\n", chain->last_leaf_found); + return NULL; + } +} /* * the main index function. * caller provides an index chain which is the current traversal of index @@ -455,7 +491,8 @@ mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 { MdbIndexPage *ipg; int passed = 0; - + int idx_sz; + int idx_start = 0; ipg = mdb_index_read_bottom_pg(mdb, idx, chain); @@ -468,30 +505,44 @@ mdb_index_find_next(MdbHandle *mdb, MdbIndex *idx, MdbIndexChain *chain, guint32 * if no more rows on this leaf, try to find a new leaf */ if (!mdb_index_find_next_on_page(mdb, ipg)) { - //printf("page %lu finished\n",ipg->pg); - if (chain->cur_depth==1) { - //printf("cur_depth == 1 we're out\n"); - return 0; + if (!chain->clean_up_mode) { + if (!(ipg = mdb_index_unwind(mdb, idx, chain))) + chain->clean_up_mode = 1; } - /* - * unwind the stack until we find something or reach - * the top. - */ - ipg = 0; - while (chain->cur_depth>1 && ipg==0) { - //printf("chain depth %d\n", chain->cur_depth); - chain->cur_depth--; - ipg = mdb_find_next_leaf(mdb, idx, chain); - if (ipg) mdb_index_find_next_on_page(mdb, ipg); + if (chain->clean_up_mode) { + //printf("in cleanup mode\n"); + + if (!chain->last_leaf_found) return 0; + mdb_read_pg(mdb, chain->last_leaf_found); + chain->last_leaf_found = mdb_pg_get_int24(mdb, 0x0c); + //printf("next leaf %lu\n", chain->last_leaf_found); + mdb_read_pg(mdb, chain->last_leaf_found); + /* reuse the chain for cleanup mode */ + chain->cur_depth = 1; + ipg = &chain->pages[0]; + mdb_index_page_init(ipg); + ipg->pg = chain->last_leaf_found; + //printf("next on page %d\n", + if (!mdb_index_find_next_on_page(mdb, ipg)) + return 0; } - if (chain->cur_depth==1) - return 0; } *row = mdb->pg_buf[ipg->offset + ipg->len - 1]; *pg = mdb_pg_get_int24_msb(mdb, ipg->offset + ipg->len - 4); //printf("row = %d pg = %lu ipg->pg = %lu offset = %lu len = %d\n", *row, *pg, ipg->pg, ipg->offset, ipg->len); + idx_sz = 4; + if (ipg->len - 4 < idx_sz) { + //printf("short index found\n"); + //buffer_dump(ipg->cache_value, 0, idx_sz); + memcpy(&ipg->cache_value[idx_sz - (ipg->len - 4)], &mdb->pg_buf[ipg->offset], ipg->len); + //buffer_dump(ipg->cache_value, 0, idx_sz); + } else { + idx_start = ipg->offset + (ipg->len - 4 - idx_sz); + memcpy(ipg->cache_value, &mdb->pg_buf[idx_start], idx_sz); + } - passed = mdb_index_test_sargs(mdb, idx, ipg->offset, ipg->len); + //idx_start = ipg->offset + (ipg->len - 4 - idx_sz); + passed = mdb_index_test_sargs(mdb, idx, ipg->cache_value, idx_sz); ipg->offset += ipg->len; } while (!passed); diff --git a/src/libmdb/table.c b/src/libmdb/table.c index 7d8ef9e..fb6a612 100644 --- a/src/libmdb/table.c +++ b/src/libmdb/table.c @@ -44,13 +44,15 @@ unsigned char mdb_col_needs_size(int col_type) } } -MdbTableDef *mdb_read_table(MdbCatalogEntry *entry) +MdbTableDef * +mdb_read_table(MdbCatalogEntry *entry) { -MdbTableDef *table; -MdbHandle *mdb = entry->mdb; -MdbFormatConstants *fmt = mdb->fmt; -int len; -int rownum, row_start, row_end; + MdbTableDef *table; + MdbHandle *mdb = entry->mdb; + MdbFormatConstants *fmt = mdb->fmt; + int len; + int rownum, row_start, row_end; + guint32 pg; table = mdb_alloc_tabledef(entry); @@ -66,7 +68,8 @@ int rownum, row_start, row_end; /* grab a copy of the usage map */ rownum = mdb->pg_buf[fmt->tab_usage_map_offset]; - mdb_read_alt_pg(mdb, mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1)); + pg = mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1); + mdb_read_alt_pg(mdb, pg); mdb_swap_pgbuf(mdb); row_start = mdb_pg_get_int16(mdb, (fmt->row_count_offset + 2) + (rownum*2)); row_end = mdb_find_end_of_row(mdb, rownum); @@ -79,7 +82,7 @@ int rownum, row_start, row_end; /* swap back */ mdb_swap_pgbuf(mdb); #if MDB_DEBUG_USAGE - printf ("usage map found on page %ld start %d end %d\n", mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1), row_start, row_end); + printf ("usage map found on page %ld rownum %d start %d end %d\n", mdb_pg_get_int24(mdb, fmt->tab_usage_map_offset + 1), rownum, row_start, row_end); #endif @@ -96,6 +99,9 @@ int rownum, row_start, row_end; memcpy(table->free_usage_map, &mdb->pg_buf[row_start], table->freemap_sz); mdb_swap_pgbuf(mdb); #endif +#if MDB_DEBUG_USAGE + printf ("free map found on page %ld rownum %d start %d end %d\n", mdb_pg_get_int24(mdb, fmt->tab_free_map_offset + 1), rownum, row_start, row_end); +#endif table->first_data_pg = mdb_pg_get_int16(mdb, fmt->tab_first_dpg_offset); @@ -326,6 +332,8 @@ guint32 pgnum; } if (table->usage_map) { printf("pages reserved by this object\n"); + printf("usage map pg %lu\n", table->map_base_pg); + printf("free map pg %lu\n", table->freemap_base_pg); pgnum = mdb_get_int32(table->usage_map,1); /* the first 5 bytes of the usage map mean something */ coln = 0; @@ -342,5 +350,6 @@ guint32 pgnum; pgnum++; } } + printf("\n"); } } diff --git a/src/libmdb/write.c b/src/libmdb/write.c index d1cf367..b10e31b 100644 --- a/src/libmdb/write.c +++ b/src/libmdb/write.c @@ -134,12 +134,8 @@ int eod, len; /* end of data */ /* find the end of data pointer */ eod = mdb_pg_get_int16(mdb, row_end - 3 - var_cols*2 - bitmask_sz); - if (IS_JET4(mdb)) { - col_start = 2; - } else { - /* data starts at 1 */ - col_start = 1; - } + col_start = 2; + /* actual cols on this row */ fixed_cols_found = 0; var_cols_found = 0; @@ -199,7 +195,11 @@ int num_of_jumps = 0, jumps_used = 0; int eod, len; /* end of data */ num_cols = mdb->pg_buf[row_start]; + if (num_cols != table->num_cols) { + fprintf(stderr,"WARNING: number of table columns does not match number of row columns, strange results may occur\n"); + } + /* how many fixed cols? */ for (i = 0; i < table->num_cols; i++) { col = g_ptr_array_index (table->columns, i); if (mdb_is_fixed_col(col)) { @@ -209,6 +209,7 @@ int eod, len; /* end of data */ fields[totcols++].is_fixed = 1; } } + /* how many var cols? */ for (i = 0; i < table->num_cols; i++) { col = g_ptr_array_index (table->columns, i); if (!mdb_is_fixed_col(col)) { @@ -251,7 +252,6 @@ int eod, len; /* end of data */ } } - //fprintf(stderr, "col_start: %d\n", col_start); /* if fixed columns add up to more than 256, we need a jump */ int col_ptr = row_end - bitmask_sz - num_of_jumps - 1; if (col_start >= 256) { @@ -261,6 +261,9 @@ int eod, len; /* end of data */ } col_start = row_start; + /* compute the number of jumps (row size - overhead) / 256 + * but you have to include the jump table itself, thus + * the loop. */ while (col_start+256 < row_end-bitmask_sz-1-var_cols-num_of_jumps){ col_start += 256; num_of_jumps++; @@ -268,12 +271,18 @@ int eod, len; /* end of data */ if (mdb->pg_buf[col_ptr]==0xFF) { col_ptr--; } + /* col_start is now the offset to the first variable length field */ col_start = mdb->pg_buf[col_ptr]; for (j=0;jnum_cols;j++) { col = g_ptr_array_index(table->columns,j); + /* if it's a var_col and we aren't looking at a column + * added after this row was created */ if (!mdb_is_fixed_col(col) && ++var_cols_found <= var_cols) { + /* if the position of this var_col matches the number + * in the current jump table entry, then increment + * the jump_used and adjust the col/row_start */ if (var_cols_found == mdb->pg_buf[row_end-bitmask_sz-jumps_used-1] && jumps_used < num_of_jumps) { row_start += 256; @@ -281,6 +290,8 @@ int eod, len; /* end of data */ jumps_used++; } + /* if we have the last var_col, use the eod offset to + * figure out where the end is */ if (var_cols_found==var_cols) { len=eod - col_start; //printf("len = %d eod %d col_start %d\n",len, eod, col_start);