Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,8 @@ private void updateStatInfo(List<Long> dbIds) {
long tabletCount = 0L;
long partitionCount = 0L;
long tableCount = 0L;
long autoPartitionNearLimitCount = 0L;
long dynamicPartitionNearLimitCount = 0L;
List<OlapTable.Statistics> newCloudTableStatsList = new ArrayList<>();
for (Long dbId : dbIds) {
Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId);
Expand Down Expand Up @@ -333,7 +335,24 @@ private void updateStatInfo(List<Long> dbIds) {
OlapTable.Statistics tableStats;
try {
List<Partition> allPartitions = olapTable.getAllPartitions();
// Use getPartitionNum() (excludes temp partitions) for limit check,
// consistent with how partition limits are enforced elsewhere.
int nonTempPartitionNum = olapTable.getPartitionNum();
partitionCount += allPartitions.size();
// Check if this table's partition count is near the limit (>80%)
if (olapTable.getPartitionInfo().enableAutomaticPartition()) {
int limit = Config.max_auto_partition_num;
if (nonTempPartitionNum > limit * 8L / 10) {
autoPartitionNearLimitCount++;
}
}
if (olapTable.dynamicPartitionExists()
&& olapTable.getTableProperty().getDynamicPartitionProperty().getEnable()) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same semantic mismatch as in TabletStatMgr.java — the dynamic partition near-limit check should compare the configured span (end - start) against max_dynamic_partition_num, not the total partition count. See the detailed comment on the TabletStatMgr.java counterpart.

int limit = Config.max_dynamic_partition_num;
if (nonTempPartitionNum > limit * 8L / 10) {
dynamicPartitionNearLimitCount++;
}
}
for (Partition partition : allPartitions) {
long partitionDataSize = 0L;
for (MaterializedIndex index : partition.getMaterializedIndices(IndexExtState.VISIBLE)) {
Expand Down Expand Up @@ -449,6 +468,9 @@ private void updateStatInfo(List<Long> dbIds) {
long avgTabletSize = totalTableSize / Math.max(1, tabletCount);
MetricRepo.GAUGE_AVG_TABLET_SIZE_BYTES.setValue(avgTabletSize);

MetricRepo.GAUGE_AUTO_PARTITION_NEAR_LIMIT.setValue(autoPartitionNearLimitCount);
MetricRepo.GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT.setValue(dynamicPartitionNearLimitCount);

LOG.info("OlapTable num=" + tableCount
+ ", partition num=" + partitionCount + ", tablet num=" + tabletCount
+ ", max tablet byte size=" + maxTabletSize.second
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ protected void runAfterCatalogReady() {
long tabletCount = 0L;
long partitionCount = 0L;
long tableCount = 0L;
long autoPartitionNearLimitCount = 0L;
long dynamicPartitionNearLimitCount = 0L;
List<Long> dbIds = Env.getCurrentInternalCatalog().getDbIds();
for (Long dbId : dbIds) {
Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId);
Expand Down Expand Up @@ -162,7 +164,24 @@ protected void runAfterCatalogReady() {
}
try {
List<Partition> allPartitions = olapTable.getAllPartitions();
// Use getPartitionNum() (excludes temp partitions) for limit check,
// consistent with how partition limits are enforced elsewhere.
int nonTempPartitionNum = olapTable.getPartitionNum();
partitionCount += allPartitions.size();
// Check if this table's partition count is near the limit (>80%)
if (olapTable.getPartitionInfo().enableAutomaticPartition()) {
int limit = Config.max_auto_partition_num;
if (nonTempPartitionNum > limit * 8L / 10) {
autoPartitionNearLimitCount++;
}
}
if (olapTable.dynamicPartitionExists()
&& olapTable.getTableProperty().getDynamicPartitionProperty().getEnable()) {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Semantic mismatch for dynamic partition near-limit check.

The original metric in DynamicPartitionUtil compared expectCreatePartitionNum (which is end - start, the configured partition span/window) against Config.max_dynamic_partition_num. Per the config's own Javadoc:

Used to limit the maximum number of partitions that can be created when creating a dynamic partition table [...] The number is determined by "start" and "end" in the dynamic partition parameters.

However, the new code here compares nonTempPartitionNum (total current partition count on the table) against max_dynamic_partition_num. These are semantically different:

  • A table with start=-5, end=3 (span=8) but 17,000 manually-added partitions would trigger this gauge even though the dynamic partition config is well within the limit.
  • Conversely, a table with start=-15000, end=5000 (span=20,000) but only 100 currently-existing partitions would NOT trigger the gauge, even though the dynamic partition span already hits the hard limit.

Consider comparing the configured span (end - start) instead:

DynamicPartitionProperty dpProp = olapTable.getTableProperty().getDynamicPartitionProperty();
long span = (long) dpProp.getEnd() - dpProp.getStart();
int limit = Config.max_dynamic_partition_num;
if (span > limit * 8L / 10) {
    dynamicPartitionNearLimitCount++;
}

This would make the gauge semantically consistent with the enforcement in DynamicPartitionUtil.

Note: The auto-partition check above is correct — FrontendServiceImpl.createPartition() does compare total partition count against max_auto_partition_num.

int limit = Config.max_dynamic_partition_num;
if (nonTempPartitionNum > limit * 8L / 10) {
dynamicPartitionNearLimitCount++;
}
}
for (Partition partition : allPartitions) {
long partitionDataSize = 0L;
long version = partition.getVisibleVersion();
Expand Down Expand Up @@ -295,6 +314,8 @@ protected void runAfterCatalogReady() {
// avoid ArithmeticException: / by zero
long avgTabletSize = totalTableSize / Math.max(1, tabletCount);
MetricRepo.GAUGE_AVG_TABLET_SIZE_BYTES.setValue(avgTabletSize);
MetricRepo.GAUGE_AUTO_PARTITION_NEAR_LIMIT.setValue(autoPartitionNearLimitCount);
MetricRepo.GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT.setValue(dynamicPartitionNearLimitCount);

LOG.info("OlapTable num=" + tableCount
+ ", partition num=" + partitionCount + ", tablet num=" + tabletCount
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
import org.apache.doris.common.FeConstants;
import org.apache.doris.common.FeNameFormat;
import org.apache.doris.common.UserException;
import org.apache.doris.metric.MetricRepo;
import org.apache.doris.policy.StoragePolicy;
import org.apache.doris.resource.Tag;
import org.apache.doris.thrift.TStorageMedium;
Expand Down Expand Up @@ -652,9 +651,6 @@ public static Map<String, String> analyzeDynamicPartition(Map<String, String> pr
LOG.warn("Dynamic partition count {} is approaching limit {} (>80%)."
+ " Consider increasing max_dynamic_partition_num.",
expectCreatePartitionNum, dynamicPartitionLimit);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT.increase(1L);
}
}
}

Expand Down
21 changes: 11 additions & 10 deletions fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
Original file line number Diff line number Diff line change
Expand Up @@ -256,9 +256,9 @@ public final class MetricRepo {
public static GaugeMetricImpl<Long> GAUGE_AVG_PARTITION_SIZE_BYTES;
public static GaugeMetricImpl<Long> GAUGE_AVG_TABLET_SIZE_BYTES;

// Partition near-limit warnings
public static LongCounterMetric COUNTER_AUTO_PARTITION_NEAR_LIMIT;
public static LongCounterMetric COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT;
// Partition near-limit warnings (gauges: current number of tables near the partition limit)
public static GaugeMetricImpl<Long> GAUGE_AUTO_PARTITION_NEAR_LIMIT;
public static GaugeMetricImpl<Long> GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT;

// Agent task
public static LongCounterMetric COUNTER_AGENT_TASK_REQUEST_TOTAL;
Expand Down Expand Up @@ -1044,15 +1044,16 @@ public Integer getValue() {
GAUGE_AVG_TABLET_SIZE_BYTES = new GaugeMetricImpl<>("avg_tablet_size_bytes", MetricUnit.BYTES, "", 0L);
DORIS_METRIC_REGISTER.addMetrics(GAUGE_AVG_TABLET_SIZE_BYTES);

// Partition near-limit warning counters
COUNTER_AUTO_PARTITION_NEAR_LIMIT = new LongCounterMetric("auto_partition_near_limit_count",
// Partition near-limit warning gauges (updated by TabletStatMgr periodic scan)
GAUGE_AUTO_PARTITION_NEAR_LIMIT = new GaugeMetricImpl<>("auto_partition_near_limit_count",
MetricUnit.NOUNIT,
"number of times auto partition count exceeded 80% of max_auto_partition_num");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_AUTO_PARTITION_NEAR_LIMIT);
COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT = new LongCounterMetric("dynamic_partition_near_limit_count",
"number of auto partition tables where partition count exceeded 80% of max_auto_partition_num", 0L);
DORIS_METRIC_REGISTER.addMetrics(GAUGE_AUTO_PARTITION_NEAR_LIMIT);
GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT = new GaugeMetricImpl<>("dynamic_partition_near_limit_count",
MetricUnit.NOUNIT,
"number of times dynamic partition count exceeded 80% of max_dynamic_partition_num");
DORIS_METRIC_REGISTER.addMetrics(COUNTER_DYNAMIC_PARTITION_NEAR_LIMIT);
"number of dynamic partition tables where partition count exceeded 80% of max_dynamic_partition_num",
0L);
DORIS_METRIC_REGISTER.addMetrics(GAUGE_DYNAMIC_PARTITION_NEAR_LIMIT);
Comment thread
dataroaring marked this conversation as resolved.

COUNTER_AGENT_TASK_REQUEST_TOTAL = new LongCounterMetric("agent_task_request_total", MetricUnit.NOUNIT,
"total agent batch task request send to BE");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@
import org.apache.doris.load.routineload.RoutineLoadManager;
import org.apache.doris.master.MasterImpl;
import org.apache.doris.meta.MetaContext;
import org.apache.doris.metric.MetricRepo;
import org.apache.doris.mysql.privilege.AccessControllerManager;
import org.apache.doris.mysql.privilege.PrivPredicate;
import org.apache.doris.nereids.trees.plans.PlanNodeAndHash;
Expand Down Expand Up @@ -4407,9 +4406,6 @@ public TCreatePartitionResult createPartition(TCreatePartitionRequest request) t
LOG.warn("Table {}.{} auto partition count {} is approaching limit {} (>80%)."
+ " Consider increasing max_auto_partition_num.",
db.getFullName(), olapTable.getName(), partitionNum, autoPartitionLimit);
if (MetricRepo.isInit) {
MetricRepo.COUNTER_AUTO_PARTITION_NEAR_LIMIT.increase(1L);
}
}

// build partition & tablets
Expand Down
Loading