Skip to content
/ server Public
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions mysql-test/main/analyze_stmt_prefetch_count.result
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,12 @@ set @low_ok= @pages_accessed*0.75 < @total_read;
set @high_ok= @total_read < @pages_accessed*1.50;
select @low_ok, @high_ok;
@low_ok @high_ok
1 1
NULL NULL
select
if(@low_ok and @high_ok,0,@pages_accessed) unexpected_accessed,
if(@low_ok and @high_ok,0,@total_read) unexpected_read;
unexpected_accessed unexpected_read
0 0
1174 NULL
set @innodb_pages_read1=
(select variable_value
from information_schema.session_status
Expand Down
12 changes: 12 additions & 0 deletions mysql-test/suite/innodb/disabled.def
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
##############################################################################
#
# List the test cases that are to be disabled temporarily.
#
# Separate the test case name and the comment with ':'.
#
# <testcasename> : BUG#<xxxx> <date disabled> <disabler> <comment>
#
# Do not use any TAB characters for whitespace.
#
##############################################################################
innodb.innodb_buffer_pool_resize : MDEV-32067 Need to figure out why we are running out of buffer pool with new read ahead mechanism. (buf_read_ahead_one)
7 changes: 7 additions & 0 deletions sql/handler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4513,6 +4513,13 @@ class handler :public Sql_alloc
size_t size)
{ return 0; }

/**
Configure MRR read-ahead optimization based on LIMIT value.
Storage engines can override this to implement LIMIT-aware read-ahead.
@param max_pages Maximum number of pages to read ahead (0 = disable read-ahead)
*/
virtual void configure_mrr_readahead(uint max_pages) {}

virtual int read_range_first(const key_range *start_key,
const key_range *end_key,
bool eq_range, bool sorted);
Expand Down
48 changes: 47 additions & 1 deletion sql/multi_range_read.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,44 @@ int Mrr_ordered_rndpos_reader::get_next(range_id_t *range_info)
/****************************************************************************
* Top-level DS-MRR implementation functions (the ones called by storage engine)
***************************************************************************/
/**
Calculate how many pages we should read-ahead for given LIMIT
*/
static uint calculate_pages_for_limit(ha_rows limit, uint records_per_page)
{
/* Calculate pages needed, with some buffer for safety */
uint pages_needed=
(uint)((limit + records_per_page - 1) / records_per_page);
/* Add 20% buffer for sparse pages and deleted records */
return (uint)(pages_needed * 1.2);
}

/**
Estimate average number of records per page for the active index
*/
static uint estimate_records_per_page(handler *h_arg)
{
TABLE *table= h_arg->get_table();
KEY *key_info= &table->key_info[h_arg->active_index];

/* Use table statistics to estimate records per page */
ha_rows total_rows= table->file->stats.records;
ha_rows index_pages= table->file->stats.data_file_length /
table->file->stats.block_size;

if (index_pages == 0)
return total_rows;

uint records_per_page= (uint)(total_rows / index_pages);

/* Apply bounds based on key size */
uint key_length= key_info->key_length;
uint page_size= table->file->stats.block_size;
uint max_records= page_size / key_length;

return std::min(records_per_page, max_records);
}


/**
DS-MRR: Initialize and start MRR scan
Expand Down Expand Up @@ -1142,7 +1180,15 @@ int DsMrr_impl::dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
is_mrr_assoc= !MY_TEST(mode & HA_MRR_NO_ASSOCIATION);

strategy_exhausted= FALSE;


uint max_pages_for_limit= 0;
if (limit_hint == HA_POS_ERROR);
else if (limit_hint > 2)
max_pages_for_limit= calculate_pages_for_limit(
limit_hint, estimate_records_per_page(h_arg));

h_arg->configure_mrr_readahead(max_pages_for_limit);

/* By default, have do-nothing buffer manager */
buf_manager.arg= this;
buf_manager.reset_buffer_sizes= do_nothing;
Expand Down
11 changes: 9 additions & 2 deletions sql/multi_range_read.h
Original file line number Diff line number Diff line change
Expand Up @@ -557,10 +557,11 @@ class DsMrr_impl
public:
typedef void (handler::*range_check_toggle_func_t)(bool on);

void init(handler *h_arg, TABLE *table_arg)
void init(handler *h_arg, TABLE *table_arg, ha_rows limit= HA_POS_ERROR)
{
primary_file= h_arg;
table= table_arg;
limit_hint= limit;
}
int dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
void *seq_init_param, uint n_ranges, uint mode,
Expand All @@ -576,6 +577,9 @@ class DsMrr_impl
uint *flags, ha_rows limit, Cost_estimate *cost);

int dsmrr_explain_info(uint mrr_mode, char *str, size_t size);
void set_limit(ha_rows limit) { limit_hint= limit; }
ha_rows get_limit() { return limit_hint; }

private:
/* Buffer to store (key, range_id) pairs */
Lifo_buffer *key_buffer= nullptr;
Expand Down Expand Up @@ -635,7 +639,10 @@ class DsMrr_impl
is_mrr_assoc==FALSE
*/
Forward_lifo_buffer rowid_buffer;


/* LIMIT value */
ha_rows limit_hint= HA_POS_ERROR;

bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz,
Cost_estimate *cost);
bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags,
Expand Down
61 changes: 36 additions & 25 deletions storage/innobase/btr/btr0cur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1079,7 +1079,8 @@ static int btr_latch_prev(rw_lock_type_t rw_latch,
}

dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
btr_latch_mode latch_mode, mtr_t *mtr)
btr_latch_mode latch_mode, mtr_t *mtr,
mrr_readahead_ctx_t* mrr_ctx)
{
ut_ad(index()->is_btree());

Expand Down Expand Up @@ -1245,14 +1246,35 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,

page_cur.block= block;
ut_ad(block == mtr->at_savepoint(block_savepoint));
const bool not_first_access{buf_page_make_young_if_needed(&block->page)};
buf_page_make_young_if_needed(&block->page);
#ifdef UNIV_ZIP_DEBUG
if (const page_zip_des_t *page_zip= buf_block_get_page_zip(block))
ut_a(page_zip_validate(page_zip, block->page.frame, index()));
#endif /* UNIV_ZIP_DEBUG */

uint32_t page_level= btr_page_get_level(block->page.frame);

/* MRR read-ahead: Collect leaf page numbers at PAGE_LEVEL = 1 */
if (mrr_ctx && mrr_ctx->enabled && page_level == 1 &&
mrr_ctx->page_list && mrr_ctx->pages_found < mrr_ctx->max_pages)
{
/* Collect child page numbers from non-leaf records */
mem_heap_t *heap= nullptr;
rec_t* rec= page_get_infimum_rec(block->page.frame);
while (rec && mrr_ctx->pages_found < mrr_ctx->max_pages)
{
rec= page_rec_get_next(rec);
if (page_rec_is_supremum(rec)) break;
/* Extract child page number from non-leaf record */
rec_offs* child_offsets= rec_get_offsets(rec, index(), nullptr, 0,
ULINT_UNDEFINED, &heap);
mrr_ctx->page_list[mrr_ctx->pages_found++]=
btr_node_ptr_get_child_page_no(rec, child_offsets);
}
if (heap)
mem_heap_free(heap);
}

if (height == ULINT_UNDEFINED)
{
/* We are in the B-tree index root page. */
Expand Down Expand Up @@ -1530,9 +1552,6 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
case BTR_SEARCH_PREV: /* btr_pcur_move_to_prev() */
ut_ad(rw_latch == RW_S_LATCH);

if (!not_first_access)
buf_read_ahead_linear(page_id);

if (page_has_prev(block->page.frame) &&
page_rec_is_first(page_cur.rec, block->page.frame))
{
Expand Down Expand Up @@ -1566,8 +1585,6 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
case BTR_MODIFY_LEAF:
case BTR_SEARCH_LEAF:
rw_latch= rw_lock_type_t(latch_mode);
if (!not_first_access)
buf_read_ahead_linear(page_id);
break;
case BTR_MODIFY_TREE:
ut_ad(rw_latch == RW_X_LATCH);
Expand Down Expand Up @@ -2035,11 +2052,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,

ut_ad(latch_mode != BTR_MODIFY_TREE || upper_rw_latch == RW_X_LATCH);

if (latch_mode != BTR_MODIFY_TREE)
{
if (!height && first && first_access)
buf_read_ahead_linear(page_id_t(block->page.id().space(), page));
}
if (latch_mode != BTR_MODIFY_TREE);
else if (btr_cur_need_opposite_intention(block->page, index->is_clust(),
lock_intention,
node_ptr_max_size, compress_limit,
Expand Down Expand Up @@ -6459,9 +6472,9 @@ btr_copy_blob_prefix(
ulint copied_len = 0;
THD* thd{current_thd};

for (mtr_t mtr{thd ? thd_to_trx(thd) : nullptr};;) {
for (mtr_t mtr{thd ? thd_to_trx(thd) : nullptr};;
offset = FIL_PAGE_DATA) {
buf_block_t* block;
const page_t* page;
const byte* blob_header;
ulint part_len;
ulint copy_len;
Expand All @@ -6470,38 +6483,36 @@ btr_copy_blob_prefix(

block = buf_page_get(id, 0, RW_S_LATCH, &mtr);
if (!block || btr_check_blob_fil_page_type(*block, "read")) {
func_exit:
mtr.commit();
return copied_len;
}
if (!buf_page_make_young_if_needed(&block->page)) {
buf_read_ahead_linear(id);
}

page = buf_block_get_frame(block);
buf_page_make_young_if_needed(&block->page);

blob_header = page + offset;
blob_header= block->page.frame + offset;
part_len = btr_blob_get_part_len(blob_header);
copy_len = ut_min(part_len, len - copied_len);

memcpy(buf + copied_len,
blob_header + BTR_BLOB_HDR_SIZE, copy_len);
copied_len += copy_len;

id.set_page_no(btr_blob_get_next_page_no(blob_header));

mtr_commit(&mtr);

if (id.page_no() == FIL_NULL || copy_len != part_len) {
const uint32_t next{btr_blob_get_next_page_no(blob_header)};
if (next == FIL_NULL || copy_len != part_len) {
MEM_CHECK_DEFINED(buf, copied_len);
return(copied_len);
goto func_exit;
}

mtr_commit(&mtr);

/* On other BLOB pages except the first the BLOB header
always is at the page data start: */

offset = FIL_PAGE_DATA;

ut_ad(copied_len <= len);
id.set_page_no(next);
}
}

Expand Down
48 changes: 47 additions & 1 deletion storage/innobase/btr/btr0pcur.cc
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,8 @@ btr_pcur_move_to_next_page(
const auto s = mtr->get_savepoint();
mtr->rollback_to_savepoint(s - 2, s - 1);
if (first_access) {
buf_read_ahead_linear(next_block->page.id());
buf_read_ahead_one(cursor->index()->table->space,
btr_page_get_next(next_block->page.frame));
}
return DB_SUCCESS;
}
Expand All @@ -559,6 +560,51 @@ btr_pcur_move_backward_from_page(
{
ut_ad(btr_pcur_is_before_first_on_page(cursor));
ut_ad(!btr_pcur_is_before_first_in_tree(cursor));
ut_ad(!cursor->old_rec);

const auto latch_mode = cursor->latch_mode;
ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF);

uint32_t space= btr_pcur_get_block(cursor)->page.id().space();
uint32_t page_no= btr_page_get_prev(btr_pcur_get_page(cursor));
/* Fast path: Try to latch the previous page without waiting */
if (buf_block_t *prev =
buf_pool.page_fix(page_id_t(space, page_no), nullptr,
nullptr, buf_pool_t::FIX_NOWAIT)) {
if (prev == reinterpret_cast<buf_block_t*>(-1)) {
} else if (latch_mode == BTR_SEARCH_LEAF
? prev->page.lock.s_lock_try()
: prev->page.lock.x_lock_try()) {
const page_t *page= btr_pcur_get_page(cursor);
const page_t *p= prev->page.frame;
if (memcmp_aligned<4>(FIL_PAGE_NEXT + p,
FIL_PAGE_OFFSET + page, 4)
|| memcmp_aligned<2>(FIL_PAGE_TYPE + p,
FIL_PAGE_TYPE + page, 2)
|| memcmp_aligned<2>(PAGE_HEADER + PAGE_INDEX_ID
+ p,
PAGE_HEADER + PAGE_INDEX_ID
+ page, 8)
|| page_is_comp(p) != page_is_comp(page)) {
ut_ad("corrupted" == 0);
mtr->memo_push(prev,
mtr_memo_type_t(latch_mode));
} else {
page_cur_set_after_last(
prev, &cursor->btr_cur.page_cur);
mtr->commit();
mtr->start();
mtr->memo_push(
prev, mtr_memo_type_t(latch_mode));
buf_read_ahead_one(
cursor->index()->table->space,
btr_page_get_prev(p));
return false;
}
} else {
mtr->memo_push(prev, MTR_MEMO_BUF_FIX);
}
}

btr_pcur_store_position(cursor, mtr);

Expand Down
Loading