StarRocks · chaoyli · Nov 29, 2023 · Nov 23, 2023 · starrocks-cr · Nov 23, 2023
@@ -34,7 +34,7 @@ SchemaScanner::ColumnDesc SchemaBeTabletsScanner::_s_columns[] = {
         {"INDEX_MEM", TYPE_BIGINT, sizeof(int64_t), false},     {"CREATE_TIME", TYPE_BIGINT, sizeof(int64_t), false},
         {"STATE", TYPE_VARCHAR, sizeof(StringValue), false},    {"TYPE", TYPE_VARCHAR, sizeof(StringValue), false},
         {"DATA_DIR", TYPE_VARCHAR, sizeof(StringValue), false}, {"SHARD_ID", TYPE_BIGINT, sizeof(int64_t), false},
-        {"SCHEMA_HASH", TYPE_BIGINT, sizeof(int64_t), false},
+        {"SCHEMA_HASH", TYPE_BIGINT, sizeof(int64_t), false},   {"INDEX_DISK", TYPE_BIGINT, sizeof(int64_t), false},
 };
 
 SchemaBeTabletsScanner::SchemaBeTabletsScanner()
@@ -92,7 +92,7 @@ Status SchemaBeTabletsScanner::fill_chunk(ChunkPtr* chunk) {
     for (; _cur_idx < end; _cur_idx++) {
         auto& info = _infos[_cur_idx];
         for (const auto& [slot_id, index] : slot_id_to_index_map) {
-            if (slot_id < 1 || slot_id > 17) {
+            if (slot_id < 1 || slot_id > 18) {
                 return Status::InternalError(strings::Substitute("invalid slot id:$0", slot_id));
             }
             ColumnPtr column = (*chunk)->get_column_by_slot_id(slot_id);
@@ -170,18 +170,26 @@ Status SchemaBeTabletsScanner::fill_chunk(ChunkPtr* chunk) {
                 break;
             }
             case 15: {
+                // DATA_DIR
                 Slice type = Slice(info.data_dir);
                 fill_column_with_slot<TYPE_VARCHAR>(column.get(), (void*)&type);
                 break;
             }
             case 16: {
+                // SHARD_ID
                 fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.shard_id);
                 break;
             }
             case 17: {
+                // SCHEMA_HASH
                 fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.schema_hash);
                 break;
             }
+            case 18: {
+                // INDEX_DISK
+                fill_column_with_slot<TYPE_BIGINT>(column.get(), (void*)&info.index_disk_usage);
+                break;
+            }
             default:
                 break;
             }

@@ -38,6 +38,7 @@ struct TabletBasicInfo {
     std::string data_dir;
     int64_t shard_id{0};
     int64_t schema_hash{0};
+    int64_t index_disk_usage{0};
 };
 
 class SchemaBeTabletsScanner : public SchemaScanner {

@@ -2820,10 +2820,14 @@ Status TabletUpdates::_get_extra_file_size(int64_t* pindex_size, int64_t* col_si
                 std::string filename = entry.path().filename().string();
 
                 if (filename.starts_with("index.l")) {
-                    *pindex_size += std::filesystem::file_size(entry);
+                    if (pindex_size != nullptr) {
+                        *pindex_size += std::filesystem::file_size(entry);
+                    }
                 } else if (filename.ends_with(".cols")) {
                     // TODO skip the expired cols file
-                    *col_size += std::filesystem::file_size(entry);
+                    if (col_size != nullptr) {
+                        *col_size += std::filesystem::file_size(entry);
+                    }
                 }
             }
         }
@@ -3962,6 +3966,16 @@ void TabletUpdates::get_basic_info_extra(TabletBasicInfo& info) {
         info.index_mem = index_entry->size();
         index_cache.release(index_entry);
     }
+    int64_t pindex_size = 0;
+    auto st = _get_extra_file_size(&pindex_size, nullptr);
+    if (!st.ok()) {
+        // Ignore error status here, because we don't to break up get basic info because of get pk index disk usage failure.
+        // So just print error log and keep going.
+        LOG(ERROR) << "get persistent index disk usage fail, tablet_id: " << _tablet.tablet_id()
+                   << ", error: " << st.get_error_msg();
+    } else {
+        info.index_disk_usage = pindex_size;
+    }
 }
 
 static double get_pk_index_write_amp_score_from_meta(Tablet* tablet) {

@@ -46,6 +46,7 @@ public static SystemTable create() {
                         .column("DATA_DIR", ScalarType.createVarchar(NAME_CHAR_LEN))
                         .column("SHARD_ID", ScalarType.createType(PrimitiveType.BIGINT))
                         .column("SCHEMA_HASH", ScalarType.createType(PrimitiveType.BIGINT))
+                        .column("INDEX_DISK", ScalarType.createType(PrimitiveType.BIGINT))
                         .build(), TSchemaTableType.SCH_BE_TABLETS);
     }
 }
diff --git a/fe/fe-core/src/test/resources/sql/scheduler/schema_scan.sql b/fe/fe-core/src/test/resources/sql/scheduler/schema_scan.sql
@@ -50,7 +50,7 @@ PLAN FRAGMENT 1(F00)
 
 [fragment]
 PLAN FRAGMENT 0
- OUTPUT EXPRS:1: BE_ID | 2: TABLE_ID | 3: PARTITION_ID | 4: TABLET_ID | 5: NUM_VERSION | 6: MAX_VERSION | 7: MIN_VERSION | 8: NUM_ROWSET | 9: NUM_ROW | 10: DATA_SIZE | 11: INDEX_MEM | 12: CREATE_TIME | 13: STATE | 14: TYPE | 15: DATA_DIR | 16: SHARD_ID | 17: SCHEMA_HASH
+ OUTPUT EXPRS:1: BE_ID | 2: TABLE_ID | 3: PARTITION_ID | 4: TABLET_ID | 5: NUM_VERSION | 6: MAX_VERSION | 7: MIN_VERSION | 8: NUM_ROWSET | 9: NUM_ROW | 10: DATA_SIZE | 11: INDEX_MEM | 12: CREATE_TIME | 13: STATE | 14: TYPE | 15: DATA_DIR | 16: SHARD_ID | 17: SCHEMA_HASH | 18: INDEX_DISK
   PARTITION: UNPARTITIONED
 
   RESULT SINK