Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1643,6 +1643,8 @@ DEFINE_mBool(enable_parquet_page_index, "true");

DEFINE_mBool(ignore_not_found_file_in_external_table, "true");

DEFINE_mBool(ignore_not_found_segment, "true");

DEFINE_mBool(enable_hdfs_mem_limiter, "true");

DEFINE_mInt16(topn_agg_limit_multiplier, "2");
Expand Down
5 changes: 5 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1704,6 +1704,11 @@ DECLARE_mBool(enable_parquet_page_index);
// Default is true, if set to false, the not found file will result in query failure.
DECLARE_mBool(ignore_not_found_file_in_external_table);

// Whether to ignore not found segment files in native olap tables.
// Default is true. When a segment file is missing (e.g., removed by gc or external cause),
// the query/load will skip the missing segment instead of failing with IO error.
DECLARE_mBool(ignore_not_found_segment);
Comment on lines +1707 to +1710
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new config name ignore_not_found_segment is misleading because the behavior (and comment) also includes IO_ERROR, not just missing segments. Since this is a newly introduced config, consider renaming it to something like ignore_segment_io_errors (or otherwise aligning the name strictly to NOT_FOUND-only behavior) to avoid operator confusion when toggling at runtime.

Suggested change
// Whether to ignore IO errors (NOT_FOUND, EIO) when loading segment files in native olap tables.
// Default is true. When a segment file is missing or has IO errors,
// the query/load will skip the failing segment instead of reporting error to users.
DECLARE_mBool(ignore_not_found_segment);
// Whether to ignore IO errors (NOT_FOUND, EIO) when loading segment files in native OLAP tables.
// Default is true. When a segment file is missing or has IO errors,
// the query/load will skip the failing segment instead of reporting error to users.
DECLARE_mBool(ignore_segment_io_errors);

Copilot uses AI. Check for mistakes.

DECLARE_mBool(enable_hdfs_mem_limiter);

// Define how many percent data in hashtable bigger than limit
Expand Down
12 changes: 11 additions & 1 deletion be/src/storage/rowset/beta_rowset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,14 @@ Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end,
int64_t seg_id = seg_id_begin;
while (seg_id < seg_id_end) {
std::shared_ptr<segment_v2::Segment> segment;
RETURN_IF_ERROR(load_segment(seg_id, nullptr, &segment));
auto st = load_segment(seg_id, nullptr, &segment);
if (st.is<ErrorCode::NOT_FOUND>() && config::ignore_not_found_segment) {
LOG(WARNING) << "segment not found, skip it. rowset_id=" << rowset_id()
<< ", seg_id=" << seg_id;
seg_id++;
continue;
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like SegmentLoader, BetaRowset::load_segments() now logs a WARNING per missing segment. If a rowset has many missing segments (or the scan is retried), this can generate a large volume of logs. Consider rate limiting and/or logging a summary once per rowset (e.g., number of skipped segments) to reduce operational noise.

Copilot uses AI. Check for mistakes.
}
RETURN_IF_ERROR(st);
segments->push_back(std::move(segment));
seg_id++;
}
Expand All @@ -260,6 +267,9 @@ Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end,

Status BetaRowset::load_segment(int64_t seg_id, OlapReaderStatistics* stats,
segment_v2::SegmentSharedPtr* segment) {
DBUG_EXECUTE_IF("BetaRowset::load_segment.return_not_found", {
return Status::Error<ErrorCode::NOT_FOUND>("injected segment not found, seg_id={}", seg_id);
});
auto fs = _rowset_meta->fs();
if (!fs) {
return Status::Error<INIT_FAILED>("get fs failed");
Expand Down
11 changes: 9 additions & 2 deletions be/src/storage/segment/lazy_init_segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,15 @@ Status LazyInitSegmentIterator::init(const StorageReadOptions& opts) {
std::shared_ptr<Segment> segment;
{
SegmentCacheHandle segment_cache_handle;
RETURN_IF_ERROR(SegmentLoader::instance()->load_segment(
_rowset, _segment_id, &segment_cache_handle, _should_use_cache, false, opts.stats));
auto st = SegmentLoader::instance()->load_segment(
_rowset, _segment_id, &segment_cache_handle, _should_use_cache, false, opts.stats);
if (st.is<ErrorCode::NOT_FOUND>() && config::ignore_not_found_segment) {
LOG(WARNING) << "segment not found, skip it. rowset_id=" << _rowset->rowset_id()
<< ", seg_id=" << _segment_id;
// _inner_iterator remains nullptr, next_batch() will return EOF
return Status::OK();
}
RETURN_IF_ERROR(st);
Comment on lines +44 to +53
Copy link

Copilot AI Mar 31, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The ignore+log logic for segment load failures is now duplicated across SegmentLoader::load_segments, LazyInitSegmentIterator::init, and BetaRowset::load_segments. To avoid future drift (e.g., one path handling a new error code differently), consider extracting a small helper (e.g., should_ignore_segment_load_error(Status)) or a shared utility for the condition + standardized log message.

Copilot uses AI. Check for mistakes.
const auto& tmp_segments = segment_cache_handle.get_segments();
segment = tmp_segments[0];
}
Expand Down
7 changes: 6 additions & 1 deletion be/src/storage/segment/lazy_init_segment_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ class LazyInitSegmentIterator : public RowwiseIterator {
Status next_batch(Block* block) override {
if (UNLIKELY(_need_lazy_init)) {
RETURN_IF_ERROR(init(_read_options));
DCHECK(_inner_iterator != nullptr);
}
if (_inner_iterator == nullptr) {
return Status::EndOfFile("segment not found, skipped");
}

return _inner_iterator->next_batch(block);
Expand All @@ -49,6 +51,9 @@ class LazyInitSegmentIterator : public RowwiseIterator {
const Schema& schema() const override { return *_schema; }

Status current_block_row_locations(std::vector<RowLocation>* locations) override {
if (_inner_iterator == nullptr) {
return Status::EndOfFile("no segment loaded");
}
return _inner_iterator->current_block_row_locations(locations);
}

Expand Down
10 changes: 8 additions & 2 deletions be/src/storage/segment/segment_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,14 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset,
return Status::OK();
}
for (int64_t i = 0; i < rowset->num_segments(); i++) {
RETURN_IF_ERROR(load_segment(rowset, i, cache_handle, use_cache, need_load_pk_index_and_bf,
index_load_stats));
auto st = load_segment(rowset, i, cache_handle, use_cache, need_load_pk_index_and_bf,
index_load_stats);
if (st.is<ErrorCode::NOT_FOUND>() && config::ignore_not_found_segment) {
LOG(WARNING) << "segment not found, skip it. rowset_id=" << rowset->rowset_id()
<< ", seg_id=" << i;
continue;
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The warning log inside the segment-load loop can spam logs when multiple segments are missing (and load_segments() is called frequently during query execution). Consider rate limiting (e.g., LOG_EVERY_N / LOG_EVERY_N_SECONDS) and/or aggregating counts per rowset to reduce operational noise while still preserving debuggability.

Copilot uses AI. Check for mistakes.
}
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SegmentLoader::load_segments() now skips NOT_FOUND segments and still returns OK, but many call sites implicitly assume cache_handle->get_segments() is indexed by seg_id and has size()==rowset->num_segments(). For example, BaseTablet::lookup_row_key() indexes segments[id] where id is a seg_id; with skipped entries this can become out-of-bounds or dereference the wrong segment in release builds. To make skipping safe, either (a) preserve the seg_id->index contract by resizing the segment vector to num_segments and storing loaded segments at segments[seg_id] (leaving nullptr for missing) and update callers to handle nullptr, or (b) change the API to return a mapping and update all callers to lookup by Segment::id() instead of positional indexing.

Suggested change
if (st.is<ErrorCode::NOT_FOUND>() && config::ignore_not_found_segment) {
LOG(WARNING) << "segment not found, skip it. rowset_id=" << rowset->rowset_id()
<< ", seg_id=" << i;
continue;
}

Copilot uses AI. Check for mistakes.
RETURN_IF_ERROR(st);
}
cache_handle->set_inited();
return Status::OK();
Expand Down
244 changes: 244 additions & 0 deletions be/test/storage/segment/ignore_not_found_segment_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include <memory>

#include "common/config.h"
#include "common/status.h"
#include "json2pb/json_to_pb.h"
#include "runtime/exec_env.h"
#include "storage/rowset/beta_rowset.h"
#include "storage/segment/lazy_init_segment_iterator.h"
#include "storage/segment/segment_loader.h"
#include "util/debug_points.h"

namespace doris {

class IgnoreNotFoundSegmentTest : public testing::Test {
protected:
void SetUp() override {
_saved_ignore = config::ignore_not_found_segment;
_saved_debug_points = config::enable_debug_points;
config::enable_debug_points = true;

// Set up a SegmentLoader for LazyInitSegmentIterator tests
_saved_segment_loader = ExecEnv::GetInstance()->segment_loader();
_segment_loader = new SegmentLoader(1024 * 1024, 100);
ExecEnv::GetInstance()->set_segment_loader(_segment_loader);
}

void TearDown() override {
DebugPoints::instance()->remove("BetaRowset::load_segment.return_not_found");
config::ignore_not_found_segment = _saved_ignore;
config::enable_debug_points = _saved_debug_points;

ExecEnv::GetInstance()->set_segment_loader(_saved_segment_loader);
delete _segment_loader;
_segment_loader = nullptr;
Comment on lines +46 to +54
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test fixture uses DebugPoints::instance()->clear() in TearDown(), which removes all debug points for the entire process. This can create test-order coupling if other tests in the same binary rely on debug points. Prefer removing only the points added by this test (e.g., remove("BetaRowset::load_segment.return_not_found")) or using an RAII helper that adds/removes a named debug point and restores config::enable_debug_points.

Copilot uses AI. Check for mistakes.
}

BetaRowsetSharedPtr create_rowset(int num_segments) {
auto schema = std::make_shared<TabletSchema>();
TabletColumn col;
col.set_name("c1");
col.set_unique_id(0);
col.set_type(FieldType::OLAP_FIELD_TYPE_INT);
col.set_length(4);
col.set_is_key(true);
col.set_is_nullable(false);
schema->append_column(col);
schema->_keys_type = DUP_KEYS;

auto rsm = std::make_shared<RowsetMeta>();
std::string json = R"({
"rowset_id": 540081,
"tablet_id": 10001,
"partition_id": 10000,
"tablet_schema_hash": 567997577,
"rowset_type": "BETA_ROWSET",
"rowset_state": "VISIBLE",
"empty": false
})";
RowsetMetaPB pb;
EXPECT_TRUE(json2pb::JsonToProtoMessage(json, &pb));
pb.set_start_version(0);
pb.set_end_version(1);
pb.set_num_segments(num_segments);
rsm->init_from_pb(pb);
Comment on lines +79 to +84
Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return value from json2pb::JsonToProtoMessage() is ignored here. Other tests in the repo treat this as a bool and assert success; if the JSON format changes, silently proceeding can make failures harder to diagnose. Please capture the return value and ASSERT_TRUE/EXPECT_TRUE it (or use a helper that returns Status).

Copilot uses AI. Check for mistakes.
rsm->set_tablet_schema(schema);

return std::make_shared<BetaRowset>(schema, rsm, "");
}

bool _saved_ignore = true;
bool _saved_debug_points = false;
SegmentLoader* _saved_segment_loader = nullptr;
SegmentLoader* _segment_loader = nullptr;
};

// Test: BetaRowset::load_segments skips NOT_FOUND segments when config enabled
TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsSkipsNotFound) {
config::ignore_not_found_segment = true;
auto rowset = create_rowset(3);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

std::vector<segment_v2::SegmentSharedPtr> segments;
auto st = rowset->load_segments(&segments);
// All segments are "not found" but should be skipped, resulting in OK with empty segments
ASSERT_TRUE(st.ok()) << st;
ASSERT_EQ(0, segments.size());
}

// Test: BetaRowset::load_segments fails on NOT_FOUND when config disabled
TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsFailsWhenConfigDisabled) {
config::ignore_not_found_segment = false;
auto rowset = create_rowset(3);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

std::vector<segment_v2::SegmentSharedPtr> segments;
auto st = rowset->load_segments(&segments);
ASSERT_TRUE(st.is<ErrorCode::NOT_FOUND>()) << st;
ASSERT_EQ(0, segments.size());
}

// Test: BetaRowset::load_segments with range skips NOT_FOUND
TEST_F(IgnoreNotFoundSegmentTest, BetaRowsetLoadSegmentsRangeSkipsNotFound) {
config::ignore_not_found_segment = true;
auto rowset = create_rowset(5);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

std::vector<segment_v2::SegmentSharedPtr> segments;
auto st = rowset->load_segments(1, 4, &segments);
ASSERT_TRUE(st.ok()) << st;
ASSERT_EQ(0, segments.size());
}

// Test: SegmentLoader::load_segments skips NOT_FOUND segments when config enabled
TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderLoadSegmentsSkipsNotFound) {
config::ignore_not_found_segment = true;
auto rowset = create_rowset(3);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

// Create a SegmentLoader with a small cache
SegmentLoader loader(1024 * 1024, 100);
SegmentCacheHandle handle;
auto st = loader.load_segments(rowset, &handle, false);
ASSERT_TRUE(st.ok()) << st;
ASSERT_EQ(0, handle.get_segments().size());
ASSERT_TRUE(handle.is_inited());
}

Copy link

Copilot AI Mar 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The new UTs validate that SegmentLoader::load_segments() returns OK when segments are missing, but they don't cover any real caller that relies on SegmentCacheHandle::get_segments() being indexable by seg_id (e.g., code paths that do segments[seg_id]). Given the change in semantics, please add a regression test exercising such a caller (or explicitly validate the seg_id->vector-index contract you intend to provide).

Suggested change
// Regression test: validate that SegmentCacheHandle::get_segments() can be indexed by seg_id
// after a successful SegmentLoader::load_segments() call. This mimics real callers that do
// `segments[seg_id]` and relies on the seg_id -> vector-index contract.
TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderSegmentsIndexableBySegId) {
config::ignore_not_found_segment = true;
auto rowset = create_rowset(3);
// Do not inject NOT_FOUND for this test; we want all segments to load successfully
SegmentLoader loader(1024 * 1024, 100);
SegmentCacheHandle handle;
auto st = loader.load_segments(rowset, &handle, false);
ASSERT_TRUE(st.ok()) << st;
ASSERT_TRUE(handle.is_inited());
const auto& segments = handle.get_segments();
// Expect that we can index by seg_id in [0, 3)
ASSERT_GE(segments.size(), 3);
for (int seg_id = 0; seg_id < 3; ++seg_id) {
// Real callers rely on segments[seg_id] being valid for each seg_id
ASSERT_NE(nullptr, segments[seg_id]);
}
}

Copilot uses AI. Check for mistakes.
// Test: SegmentLoader::load_segments fails when config disabled
TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderLoadSegmentsFailsWhenConfigDisabled) {
config::ignore_not_found_segment = false;
auto rowset = create_rowset(3);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

SegmentLoader loader(1024 * 1024, 100);
SegmentCacheHandle handle;
auto st = loader.load_segments(rowset, &handle, false);
ASSERT_TRUE(st.is<ErrorCode::NOT_FOUND>()) << st;
}

// Test: SegmentLoader::load_segment (single) returns NOT_FOUND directly
// (single-segment load does not skip; it's the caller's responsibility)
TEST_F(IgnoreNotFoundSegmentTest, SegmentLoaderLoadSingleSegmentReturnsNotFound) {
auto rowset = create_rowset(1);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

SegmentLoader loader(1024 * 1024, 100);
SegmentCacheHandle handle;
auto st = loader.load_segment(rowset, 0, &handle, false);
ASSERT_TRUE(st.is<ErrorCode::NOT_FOUND>()) << st;
}

// Test: LazyInitSegmentIterator returns EOF when segment not found
// (explicit init path - simulates VUnionIterator calling init() before next_batch())
TEST_F(IgnoreNotFoundSegmentTest, LazyInitIteratorReturnsEofOnNotFound) {
config::ignore_not_found_segment = true;
auto rowset = create_rowset(1);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

auto schema = std::make_shared<Schema>(rowset->tablet_schema());
StorageReadOptions opts;
opts.tablet_schema = rowset->tablet_schema();

auto iter = std::make_unique<segment_v2::LazyInitSegmentIterator>(
rowset, 0, false, schema, opts);

// Explicit init should succeed (segment skipped, inner iterator is null)
auto st = iter->init(opts);
ASSERT_TRUE(st.ok()) << st;

// next_batch should return EOF since inner iterator is null.
// This is the critical path: _need_lazy_init is already false because init() was called,
// so the null check must be outside the UNLIKELY branch.
Block block;
st = iter->next_batch(&block);
ASSERT_TRUE(st.is<ErrorCode::END_OF_FILE>()) << st;
}

// Test: LazyInitSegmentIterator fails when config disabled
TEST_F(IgnoreNotFoundSegmentTest, LazyInitIteratorFailsWhenConfigDisabled) {
config::ignore_not_found_segment = false;
auto rowset = create_rowset(1);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

auto schema = std::make_shared<Schema>(rowset->tablet_schema());
StorageReadOptions opts;
opts.tablet_schema = rowset->tablet_schema();

auto iter = std::make_unique<segment_v2::LazyInitSegmentIterator>(
rowset, 0, false, schema, opts);

// init should fail with NOT_FOUND
auto st = iter->init(opts);
ASSERT_TRUE(st.is<ErrorCode::NOT_FOUND>()) << st;
}

// Test: LazyInitSegmentIterator next_batch path with lazy init (not pre-inited)
// (lazy path - simulates the case where next_batch() triggers init internally)
TEST_F(IgnoreNotFoundSegmentTest, LazyInitIteratorNextBatchLazyPath) {
config::ignore_not_found_segment = true;
auto rowset = create_rowset(1);

DebugPoints::instance()->add("BetaRowset::load_segment.return_not_found");

auto schema = std::make_shared<Schema>(rowset->tablet_schema());
StorageReadOptions opts;
opts.tablet_schema = rowset->tablet_schema();

auto iter = std::make_unique<segment_v2::LazyInitSegmentIterator>(
rowset, 0, false, schema, opts);

// Don't call init() explicitly - let next_batch trigger lazy init
Block block;
auto st = iter->next_batch(&block);
ASSERT_TRUE(st.is<ErrorCode::END_OF_FILE>()) << st;
}

} // namespace doris
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !baseline --
6

-- !ignore_enabled --
0

-- !recovery --
6

Loading
Loading