Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
69bc2d8
[fix](inverted index) resolve variant sub-column indexes for score query
airborne12 May 3, 2026
1cc5c93
[fix](inverted index) address variant score review comments
airborne12 May 3, 2026
d483432
[fix](inverted index) handle variant score parent index fallback
airborne12 May 3, 2026
21c9e64
[fix](inverted index) minimize variant util changes
airborne12 May 3, 2026
69b5dc4
[fix](inverted index) collapse variant score fallback and drop typed_…
airborne12 May 4, 2026
948049c
[refactor](inverted index) extract resolve_subcolumn_indexes_inherita…
airborne12 May 4, 2026
01eb802
[test](inverted index) regression for variant score with field_patter…
airborne12 May 4, 2026
e12ab7f
[fix](inverted index) resolve score stats for materialized variant paths
airborne12 May 4, 2026
452a20e
[fix](inverted index) format variant column reader
airborne12 May 4, 2026
a1730c2
[fix](inverted index) restore find_subcolumn_tablet_indexes to master
airborne12 May 5, 2026
efdde31
[fix](inverted index) restrict collector fallback to schema-only fiel…
airborne12 May 5, 2026
3d90512
[refactor](inverted index) drop unused resolve_subcolumn_indexes_inhe…
airborne12 May 5, 2026
6c4fff4
[test](inverted index) cover all branches of MatchPredicateCollector:…
airborne12 May 5, 2026
00738dd
[fix](inverted index) clone parent plain index for VARIANT placeholde…
airborne12 May 5, 2026
2c16093
[chore](format) clang-format pass on PR #62992 changes
airborne12 May 5, 2026
910092c
[test](inverted index) add direct UTs for PredicateCollector helpers
airborne12 May 5, 2026
ca04990
[test](inverted index) call MatchPredicateCollector::collect() direct…
airborne12 May 6, 2026
3060511
[chore](format) clang-format pass on collector branch tests
airborne12 May 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 54 additions & 3 deletions be/src/storage/predicate_collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@

#include <glog/logging.h>

#include <vector>

#include "exec/common/variant_util.h"
#include "exprs/vexpr.h"
#include "exprs/vexpr_context.h"
#include "exprs/vliteral.h"
Expand Down Expand Up @@ -91,7 +94,49 @@ Status MatchPredicateCollector::collect(RuntimeState* state, const TabletSchemaS
}

const auto& column = tablet_schema->column(col_idx);
auto index_metas = tablet_schema->inverted_indexs(sd->col_unique_id(), column.suffix_path());
auto index_metas = tablet_schema->inverted_indexs(column);
std::vector<std::shared_ptr<const TabletIndex>> owned_index_metas;
std::string index_suffix_path = column.suffix_path();

// Schema-only fallback for variant sub-columns. Collector runs at tablet
// level without segment context, so we cannot do nested-group inference
// or inherit_index runtime-type dispatch. Two paths cover what is
// resolvable from schema alone:
// 1. field_pattern templates (MATCH_NAME / MATCH_NAME_GLOB) via
// generate_sub_column_info.
// 2. Plain parent inverted index when the schema column is the dynamic
// path's VARIANT placeholder produced by _init_variant_columns. In
// that state inverted_indexs(column) misses because
// _path_set_info_map.subcolumn_indexes is only populated for typed
// paths / field_pattern outputs, not for plain parent indexes added
// by ALTER. Clone the parent's non-field-pattern indexes with the
// variant path as suffix so segment-side BM25 statistics can be
// collected.
if (index_metas.empty() && column.is_extracted_column()) {
TabletSchema::SubColumnInfo sub_column_info;
const std::string relative_path = column.path_info_ptr()->copy_pop_front().get_path();
if (variant_util::generate_sub_column_info(*tablet_schema, column.parent_unique_id(),
relative_path, &sub_column_info) &&
!sub_column_info.indexes.empty()) {
index_suffix_path = sub_column_info.column.suffix_path();
for (auto& idx : sub_column_info.indexes) {
index_metas.push_back(idx.get());
owned_index_metas.emplace_back(std::move(idx));
}
} else if (column.is_variant_type()) {
const auto parent_indexes = tablet_schema->inverted_indexs(column.parent_unique_id());
for (const auto* index : parent_indexes) {
if (!index->field_pattern().empty()) {
continue;
}
auto index_ptr = std::make_shared<TabletIndex>(*index);
index_ptr->set_escaped_escaped_index_suffix_path(
column.path_info_ptr()->get_path());
index_metas.push_back(index_ptr.get());
owned_index_metas.emplace_back(std::move(index_ptr));
}
}
}

#ifndef BE_TEST
if (index_metas.empty()) {
Expand All @@ -117,14 +162,20 @@ Status MatchPredicateCollector::collect(RuntimeState* state, const TabletSchemaS
index_meta->properties());

std::string field_name =
build_field_name(index_meta->col_unique_ids()[0], column.suffix_path());
build_field_name(index_meta->col_unique_ids()[0], index_suffix_path);
std::wstring ws_field_name = StringHelper::to_wstring(field_name);

auto iter = collect_infos->find(ws_field_name);
if (iter == collect_infos->end()) {
CollectInfo collect_info;
collect_info.term_infos.insert(term_infos.begin(), term_infos.end());
collect_info.index_meta = index_meta;
for (const auto& owned_index_meta : owned_index_metas) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (column.is_extracted_column()) continue;

if (owned_index_meta.get() == index_meta) {
collect_info.owned_index_meta = owned_index_meta;
break;
}
}
(*collect_infos)[ws_field_name] = std::move(collect_info);
} else {
iter->second.term_infos.insert(term_infos.begin(), term_infos.end());
Expand Down Expand Up @@ -260,4 +311,4 @@ SearchPredicateCollector::ClauseTypeCategory SearchPredicateCollector::get_claus
}
}

} // namespace doris
} // namespace doris
1 change: 1 addition & 0 deletions be/src/storage/predicate_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ struct TermInfoComparer {

struct CollectInfo {
std::set<segment_v2::TermInfo, TermInfoComparer> term_infos;
std::shared_ptr<const TabletIndex> owned_index_meta;
const TabletIndex* index_meta = nullptr;
};
using CollectInfoMap = std::unordered_map<std::wstring, CollectInfo>;
Expand Down
Loading
Loading