From da0457fb2f4ccc0d794d2936b751a9be45be59de Mon Sep 17 00:00:00 2001 From: Binglin Chang Date: Thu, 7 Dec 2023 09:56:30 +0800 Subject: [PATCH] [Enhancement] Add max_tablet_rowset_num metrcis (#36539) Why I'm doing: Currently, it's hard to detect 'too many versions' error early, need a monitoring metrics. What I'm doing: Add a metric about BE's tablets' max rowset count, if this value continues increasing, it may indicate compaction is blocked or has issues, so adding a monitor for this value can detect too many version errors early. Fixes #36469 Signed-off-by: Binglin Chang (cherry picked from commit 555dfa65b1e166dd78752c603bc39075378d0f73) # Conflicts: # be/src/util/starrocks_metrics.cpp --- be/src/storage/tablet_manager.cpp | 7 +++++-- be/src/util/starrocks_metrics.cpp | 15 +++++++++++++++ be/src/util/starrocks_metrics.h | 1 + 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/be/src/storage/tablet_manager.cpp b/be/src/storage/tablet_manager.cpp index 644e68727668de..f0e1da17e1f0ed 100644 --- a/be/src/storage/tablet_manager.cpp +++ b/be/src/storage/tablet_manager.cpp @@ -903,13 +903,14 @@ Status TabletManager::report_all_tablets_info(std::map* tabl StarRocksMetrics::instance()->report_all_tablets_requests_total.increment(1); + size_t max_tablet_rowset_num = 0; for (const auto& tablets_shard : _tablets_shards) { std::shared_lock rlock(tablets_shard.lock); for (const auto& [tablet_id, tablet_ptr] : tablets_shard.tablet_map) { TTablet t_tablet; TTabletInfo tablet_info; tablet_ptr->build_tablet_report_info(&tablet_info); - + max_tablet_rowset_num = std::max(max_tablet_rowset_num, tablet_ptr->version_count()); // find expired transaction corresponding to this tablet TabletInfo tinfo(tablet_id, tablet_ptr->schema_hash(), tablet_ptr->tablet_uid()); auto find = expire_txn_map.find(tinfo); @@ -924,7 +925,9 @@ Status TabletManager::report_all_tablets_info(std::map* tabl } } } - LOG(INFO) << "Report all " << tablets_info->size() << " tablets info"; + LOG(INFO) << "Report all " << tablets_info->size() + << " tablets info. max_tablet_rowset_num:" << max_tablet_rowset_num; + StarRocksMetrics::instance()->max_tablet_rowset_num.set_value(max_tablet_rowset_num); return Status::OK(); } diff --git a/be/src/util/starrocks_metrics.cpp b/be/src/util/starrocks_metrics.cpp index dedbadccf32e62..b89b6549f20a43 100644 --- a/be/src/util/starrocks_metrics.cpp +++ b/be/src/util/starrocks_metrics.cpp @@ -175,6 +175,21 @@ StarRocksMetrics::StarRocksMetrics() : _metrics(_s_registry_name) { REGISTER_STARROCKS_METRIC(tablet_cumulative_max_compaction_score); REGISTER_STARROCKS_METRIC(tablet_base_max_compaction_score); REGISTER_STARROCKS_METRIC(tablet_update_max_compaction_score); +<<<<<<< HEAD +======= + REGISTER_STARROCKS_METRIC(max_tablet_rowset_num); + REGISTER_STARROCKS_METRIC(wait_cumulative_compaction_task_num); + REGISTER_STARROCKS_METRIC(wait_base_compaction_task_num); + REGISTER_STARROCKS_METRIC(running_cumulative_compaction_task_num); + REGISTER_STARROCKS_METRIC(running_base_compaction_task_num); + REGISTER_STARROCKS_METRIC(running_update_compaction_task_num); + REGISTER_STARROCKS_METRIC(cumulative_compaction_task_cost_time_ms); + REGISTER_STARROCKS_METRIC(base_compaction_task_cost_time_ms); + REGISTER_STARROCKS_METRIC(update_compaction_task_cost_time_ns); + REGISTER_STARROCKS_METRIC(base_compaction_task_byte_per_second); + REGISTER_STARROCKS_METRIC(cumulative_compaction_task_byte_per_second); + REGISTER_STARROCKS_METRIC(update_compaction_task_byte_per_second); +>>>>>>> 555dfa65b1 ([Enhancement] Add max_tablet_rowset_num metrcis (#36539)) REGISTER_STARROCKS_METRIC(push_request_write_bytes_per_second); REGISTER_STARROCKS_METRIC(query_scan_bytes_per_second); diff --git a/be/src/util/starrocks_metrics.h b/be/src/util/starrocks_metrics.h index 6b60509fca1437..2ef90b2c112f1f 100644 --- a/be/src/util/starrocks_metrics.h +++ b/be/src/util/starrocks_metrics.h @@ -208,6 +208,7 @@ class StarRocksMetrics { METRIC_DEFINE_INT_GAUGE(tablet_cumulative_max_compaction_score, MetricUnit::NOUNIT); METRIC_DEFINE_INT_GAUGE(tablet_base_max_compaction_score, MetricUnit::NOUNIT); METRIC_DEFINE_INT_GAUGE(tablet_update_max_compaction_score, MetricUnit::NOUNIT); + METRIC_DEFINE_INT_GAUGE(max_tablet_rowset_num, MetricUnit::NOUNIT); // The following metrics will be calculated // by metric calculator