Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Support profile for only big query (2) (backport #39855) #40206

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions be/src/exec/pipeline/fragment_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,9 @@ Status FragmentExecutor::_prepare_query_ctx(ExecEnv* exec_env, const UnifiedExec
if (query_options.__isset.enable_profile && query_options.enable_profile) {
_query_ctx->set_enable_profile();
}
if (query_options.__isset.big_query_profile_second_threshold) {
_query_ctx->set_big_query_profile_threshold(query_options.big_query_profile_second_threshold);
if (query_options.__isset.big_query_profile_threshold) {
_query_ctx->set_big_query_profile_threshold(query_options.big_query_profile_threshold,
query_options.big_query_profile_threshold_unit);
}
if (query_options.__isset.pipeline_profile_level) {
_query_ctx->set_profile_level(query_options.pipeline_profile_level);
Expand Down
25 changes: 23 additions & 2 deletions be/src/exec/pipeline/query_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,29 @@ class QueryContext : public std::enable_shared_from_this<QueryContext> {
}
return MonotonicNanos() - _query_begin_time > _big_query_profile_threshold_ns;
}
void set_big_query_profile_threshold(int64_t big_query_profile_threshold_s) {
_big_query_profile_threshold_ns = 1'000'000'000L * big_query_profile_threshold_s;
void set_big_query_profile_threshold(int64_t big_query_profile_threshold,
TTimeUnit::type big_query_profile_threshold_unit) {
int64_t factor = 1;
switch (big_query_profile_threshold_unit) {
case TTimeUnit::NANOSECOND:
factor = 1;
break;
case TTimeUnit::MICROSECOND:
factor = 1'000L;
break;
case TTimeUnit::MILLISECOND:
factor = 1'000'000L;
break;
case TTimeUnit::SECOND:
factor = 1'000'000'000L;
break;
case TTimeUnit::MINUTE:
factor = 60 * 1'000'000'000L;
break;
default:
DCHECK(false);
}
_big_query_profile_threshold_ns = factor * big_query_profile_threshold;
}
void set_runtime_profile_report_interval(int64_t runtime_profile_report_interval_s) {
_runtime_profile_report_interval_ns = 1'000'000'000L * runtime_profile_report_interval_s;
Expand Down
13 changes: 10 additions & 3 deletions docs/en/administration/query_profile_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,17 @@ SET enable_profile = true;

### Enable Query Profile for Slow Queries

It is not recommended to enable Query Profile in a production environment on a global, long-term basis. This is because the data collection and processing of Query Profile may impose additional burdens on the system. However, if you need to capture and analyze slow queries, you can enable Query Profile only for slow queries. This can be achieved by setting the variable `big_query_profile_second_threshold` to an integer greater than `0`. For example, if this variable is set to `30`, it means that only queries with an execution time exceeding 30 seconds will trigger Query Profile. This ensures system performance while effectively monitoring slow queries.
It is not recommended to enable Query Profile in a production environment on a global, long-term basis. This is because the data collection and processing of Query Profile may impose additional burdens on the system. However, if you need to capture and analyze slow queries, you can enable Query Profile only for slow queries. This can be achieved by setting the variable `big_query_profile_threshold` to a time duration greater than `0s`. For example, if this variable is set to `30s`, it means that only queries with an execution time exceeding 30 seconds will trigger Query Profile. This ensures system performance while effectively monitoring slow queries.

```SQL
SET global big_query_profile_second_threshold = 30;
-- 30 seconds
SET global big_query_profile_threshold = '30s';

-- 500 milliseconds
SET global big_query_profile_threshold = '500ms';

-- 60 minutes
SET global big_query_profile_threshold = '60m';
```

### Enable Runtime Query Profile
Expand All @@ -41,7 +48,7 @@ Runtime Query Profile has the same format and content as regular Query Profile.
| Session Variable | enable_profile | true/false | false | Whether to enable Query Profile. `true` means to enable this feature. |
| Session Variable | pipeline_profile_level | 1/2 | 1 | Set the level of Query Profile. `1` indicates merging the metrics of the Query Profile; `2` indicates retaining the original structure of the Query Profile. If this item is set as `2`, all visualization analysis tools will no longer be applicable, therefore, it is generally not recommended to change this value. |
| Session Variable | runtime_profile_report_interval | Positive integer | 10 | The report interval of Runtime Query Profile. Unit: second. |
| Session Variable | big_query_profile_second_threshold | Integer | 0 | If the execution time of a big query excceds this value, Query Profile is automatically enbaled for this query. Setting this item to `0` indicates this feature is disabled. Unit: second. |
| Session Variable | big_query_profile_threshold | String | `0s` | If the execution time of a big query excceds this value, Query Profile is automatically enbaled for this query. Setting this item to `0s` indicates this feature is disabled. Its value can be represented by a integral number followed by a unit, where the units can be `ms`, `s`, `m`. |
| FE Dynamic Configuration Item | enable_statistics_collect_profile | true/false | false | Whether to enable Query Profile for statistics collection-related queries. `true` means to enable this feature. |

### Obtain Query Profile via Web UI
Expand Down
6 changes: 4 additions & 2 deletions docs/en/reference/System_variable.md
Original file line number Diff line number Diff line change
Expand Up @@ -148,9 +148,11 @@ Used for MySQL client compatibility. No practical usage.

Used to specify the number of rows of a single packet transmitted by each node during query execution. The default is 1024, i.e., every 1024 rows of data generated by the source node is packaged and sent to the destination node. A larger number of rows will improve the query throughput in large data volume scenarios, but may increase the query latency in small data volume scenarios. Also, it may increase the memory overhead of the query. We recommend to set `batch_size` between 1024 to 4096.

### big_query_profile_second_threshold (3.1 and later)
### big_query_profile_threshold (3.1 and later)

When the session variable `enable_profile` is set to `false` and the amount of time taken by a query exceeds the threshold specified by the variable `big_query_profile_second_threshold`, a profile is generated for that query.
When the session variable `enable_profile` is set to `false` and the amount of time taken by a query exceeds the threshold specified by the variable `big_query_profile_threshold`, a profile is generated for that query.

Note: In versions v3.1.5 to v3.1.7, as well as v3.2.0 to v3.2.2, we introduced the `big_query_profile_second_threshold` for setting the threshold for big queries. In versions v3.1.8, v3.2.3, and subsequent releases, this parameter has been replaced by `big_query_profile_threshold` to offer more flexible configuration options.

### cbo_decimal_cast_string_strict (2.5.14 and later)

Expand Down
13 changes: 10 additions & 3 deletions docs/zh/administration/query_profile_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,17 @@ SET enable_profile = true;

### 针对慢查询开启 Query Profile

在生产环境中,通常不推荐全面启用 Query Profile 功能。这是因为 Query Profile 的数据采集和处理过程可能会为系统带来额外的负担。然而,如果需要捕捉到耗时的慢查询,就需要巧妙地使用这一功能。为此,您可以选择只对慢查询启用 Query Profile。这可以通过设置变量 `big_query_profile_second_threshold` 为一个大于 `0` 的整数来实现。例如,若将此变量设置为 30,意味着只有那些执行时间超过 30 秒的查询会启用 Query Profile 功能。这样既保证了系统性能,又能有效监控到慢查询。
在生产环境中,通常不推荐全面启用 Query Profile 功能。这是因为 Query Profile 的数据采集和处理过程可能会为系统带来额外的负担。然而,如果需要捕捉到耗时的慢查询,就需要巧妙地使用这一功能。为此,您可以选择只对慢查询启用 Query Profile。这可以通过设置变量 `big_query_profile_threshold` 为一个大于 `0s` 的时间来实现。例如,若将此变量设置为 `30s`,意味着只有那些执行时间超过 30 秒的查询会启用 Query Profile 功能。这样既保证了系统性能,又能有效监控到慢查询。

```SQL
SET global big_query_profile_second_threshold = 30;
-- 30 seconds
SET global big_query_profile_threshold = '30s';

-- 500 milliseconds
SET global big_query_profile_threshold = '500ms';

-- 60 minutes
SET global big_query_profile_threshold = '60m';
```

### 启用 Runtime Query Profile
Expand All @@ -41,7 +48,7 @@ Runtime Query Profile 与普通 Query Profile 格式和内容均相同。您可
| Session 变量 | enable_profile | true/false | false |是否启用 Query Profile 功能。`true` 表示启用。 |
| Session 变量 | pipeline_profile_level | 1/2 | 1 | 设置 Query Profile 的级别。`1` 表示会对 Profile 进行合并展示;`2` 表示保留原始的 Profile,如果选用这一级别,那么所有可视化的分析工具将不再起作用,因此通常不建议修改该参数。 |
| Session 变量 | runtime_profile_report_interval | 正整数 | 10 | 设置 Runtime Query Profile 上报的时间间隔,单位秒。 |
| Session 变量 | big_query_profile_second_threshold | 整数 | 0 | 设置长查询自动开启 Query Profile 的阈值,`0` 或者负数表示关闭该功能。正整数表示启用,单位秒。 |
| Session 变量 | big_query_profile_threshold | 字符串 | `0s` | 设置长查询自动开启 Query Profile 的阈值,`0s` 表示关闭该功能。整数结合时间单位表示启用,可以用单位包括:`ms`、`s`、`m`。 |
| FE 动态配置项 | enable_statistics_collect_profile | true/false | false | 是否启用统计信息采集相关查询的 Query Profile。`true` 表示启用。 |

## 获取 Query Profile
Expand Down
6 changes: 4 additions & 2 deletions docs/zh/reference/System_variable.md
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,11 @@ SELECT /*+ SET_VAR

用于指定在查询执行过程中,各个节点传输的单个数据包的行数。默认一个数据包的行数为 1024 行,即源端节点每产生 1024 行数据后,打包发给目的节点。较大的行数,会在扫描大数据量场景下提升查询的吞吐率,但可能会在小查询场景下增加查询延迟。同时,也会增加查询的内存开销。建议设置范围 1024 至 4096。

### big_query_profile_second_threshold (3.1 及以后)
### big_query_profile_threshold (3.1 及以后)

当会话变量 `enable_profile` 设置为 `false` 且查询时间超过 `big_query_profile_second_threshold` 设定的阈值时,则会生成 Profile。
当会话变量 `enable_profile` 设置为 `false` 且查询时间超过 `big_query_profile_threshold` 设定的阈值时,则会生成 Profile。

注意:在版本 v3.1.5 至 v3.1.7 以及 v3.2.0 至 v3.2.2 中,我们引入了 `big_query_profile_second_threshold` 参数,用于设定大型查询的阈值。而在 v3.1.8、v3.2.3 及后续版本中,此参数被 `big_query_profile_threshold` 替代,以便提供更加灵活的配置选项。

### cbo_decimal_cast_string_strict (2.5.14 及以后)

Expand Down
71 changes: 30 additions & 41 deletions fe/fe-core/src/main/java/com/starrocks/monitor/unit/TimeValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -233,58 +233,47 @@ public String getStringRep() {
}
}

public static TimeValue parseTimeValue(String sValue, String settingName) throws Exception {
Objects.requireNonNull(settingName);
public static TimeValue parseTimeValue(String sValue) {
Objects.requireNonNull(sValue);
return parseTimeValue(sValue, null, settingName);
return parseTimeValue(sValue, TimeValue.ZERO);
}

public static TimeValue parseTimeValue(String sValue, TimeValue defaultValue, String settingName)
throws Exception {
settingName = Objects.requireNonNull(settingName);
public static TimeValue parseTimeValue(String sValue, TimeValue defaultValue) {
if (sValue == null) {
return defaultValue;
}
final String normalized = sValue.toLowerCase(Locale.ROOT).trim();
if (normalized.endsWith("nanos")) {
return new TimeValue(parse(sValue, normalized, "nanos"), TimeUnit.NANOSECONDS);
} else if (normalized.endsWith("micros")) {
return new TimeValue(parse(sValue, normalized, "micros"), TimeUnit.MICROSECONDS);
} else if (normalized.endsWith("ms")) {
return new TimeValue(parse(sValue, normalized, "ms"), TimeUnit.MILLISECONDS);
} else if (normalized.endsWith("s")) {
return new TimeValue(parse(sValue, normalized, "s"), TimeUnit.SECONDS);
} else if (sValue.endsWith("m")) {
// parsing minutes should be case-sensitive as 'M' means "months", not "minutes"; this is the only special case.
return new TimeValue(parse(sValue, normalized, "m"), TimeUnit.MINUTES);
} else if (normalized.endsWith("h")) {
return new TimeValue(parse(sValue, normalized, "h"), TimeUnit.HOURS);
} else if (normalized.endsWith("d")) {
return new TimeValue(parse(sValue, normalized, "d"), TimeUnit.DAYS);
} else if (normalized.matches("-0*1")) {
return TimeValue.MINUS_ONE;
} else if (normalized.matches("0+")) {
return TimeValue.ZERO;
} else {
// Missing units:
throw new Exception(
"failed to parse setting [{}] with value [{}] as a time value: unit is missing or unrecognized");
try {
if (normalized.endsWith("nanos")) {
return new TimeValue(parse(normalized, "nanos"), TimeUnit.NANOSECONDS);
} else if (normalized.endsWith("micros")) {
return new TimeValue(parse(normalized, "micros"), TimeUnit.MICROSECONDS);
} else if (normalized.endsWith("ms")) {
return new TimeValue(parse(normalized, "ms"), TimeUnit.MILLISECONDS);
} else if (normalized.endsWith("s")) {
return new TimeValue(parse(normalized, "s"), TimeUnit.SECONDS);
} else if (sValue.endsWith("m")) {
// parsing minutes should be case-sensitive as 'M' means "months", not "minutes"; this is the only special case.
return new TimeValue(parse(normalized, "m"), TimeUnit.MINUTES);
} else if (normalized.endsWith("h")) {
return new TimeValue(parse(normalized, "h"), TimeUnit.HOURS);
} else if (normalized.endsWith("d")) {
return new TimeValue(parse(normalized, "d"), TimeUnit.DAYS);
} else if (normalized.matches("-0*1")) {
return TimeValue.MINUS_ONE;
} else if (normalized.matches("0+")) {
return TimeValue.ZERO;
} else {
return defaultValue;
}
} catch (Exception e) {
return defaultValue;
}
}

private static long parse(final String initialInput, final String normalized, final String suffix) {
private static long parse(final String normalized, final String suffix) {
final String s = normalized.substring(0, normalized.length() - suffix.length()).trim();
try {
return Long.parseLong(s);
} catch (final NumberFormatException e) {
try {
@SuppressWarnings("unused")
final double ignored = Double.parseDouble(s);
throw new NumberFormatException("failed to parse, fractional time values are not supported");
} catch (final NumberFormatException ignored) {
throw new NumberFormatException("failed to parse");
}
}
return Long.parseLong(s);
}

private static final long C0 = 1L;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -586,8 +586,7 @@ public boolean isProfileEnabled() {
if (!sessionVariable.isEnableBigQueryProfile()) {
return false;
}
return System.currentTimeMillis() - getStartTime() >
1000L * sessionVariable.getBigQueryProfileSecondThreshold();
return System.currentTimeMillis() - getStartTime() > sessionVariable.getBigQueryProfileMilliSecondThreshold();
}

public boolean needMergeProfile() {
Expand Down
17 changes: 10 additions & 7 deletions fe/fe-core/src/main/java/com/starrocks/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
import com.starrocks.common.io.Writable;
import com.starrocks.common.util.CompressionUtils;
import com.starrocks.common.util.TimeUtils;
import com.starrocks.monitor.unit.TimeValue;
import com.starrocks.qe.VariableMgr.VarAttr;
import com.starrocks.sql.analyzer.SemanticException;
import com.starrocks.sql.common.QueryDebugOptions;
Expand All @@ -55,6 +56,7 @@
import com.starrocks.thrift.TQueryOptions;
import com.starrocks.thrift.TSpillMode;
import com.starrocks.thrift.TTabletInternalParallelMode;
import com.starrocks.thrift.TTimeUnit;
import org.apache.commons.lang3.EnumUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -494,7 +496,7 @@ public static MaterializedViewRewriteMode parse(String str) {
public static final String BIG_QUERY_LOG_CPU_SECOND_THRESHOLD = "big_query_log_cpu_second_threshold";
public static final String BIG_QUERY_LOG_SCAN_BYTES_THRESHOLD = "big_query_log_scan_bytes_threshold";
public static final String BIG_QUERY_LOG_SCAN_ROWS_THRESHOLD = "big_query_log_scan_rows_threshold";
public static final String BIG_QUERY_PROFILE_SECOND_THRESHOLD = "big_query_profile_second_threshold";
public static final String BIG_QUERY_PROFILE_THRESHOLD = "big_query_profile_threshold";

public static final String SQL_DIALECT = "sql_dialect";

Expand Down Expand Up @@ -896,8 +898,8 @@ public static MaterializedViewRewriteMode parse(String str) {
@VariableMgr.VarAttr(name = ENABLE_ASYNC_PROFILE, flag = VariableMgr.INVISIBLE)
private boolean enableAsyncProfile = true;

@VariableMgr.VarAttr(name = BIG_QUERY_PROFILE_SECOND_THRESHOLD)
private int bigQueryProfileSecondThreshold = 0;
@VariableMgr.VarAttr(name = BIG_QUERY_PROFILE_THRESHOLD)
private String bigQueryProfileThreshold = "0s";

@VariableMgr.VarAttr(name = RESOURCE_GROUP_ID, alias = RESOURCE_GROUP_ID_V2,
show = RESOURCE_GROUP_ID_V2, flag = VariableMgr.INVISIBLE)
Expand Down Expand Up @@ -1824,11 +1826,11 @@ public void setEnableLoadProfile(boolean enableLoadProfile) {
}

public boolean isEnableBigQueryProfile() {
return bigQueryProfileSecondThreshold > 0;
return TimeValue.parseTimeValue(bigQueryProfileThreshold).getMillis() > 0;
}

public int getBigQueryProfileSecondThreshold() {
return bigQueryProfileSecondThreshold;
public long getBigQueryProfileMilliSecondThreshold() {
return TimeValue.parseTimeValue(bigQueryProfileThreshold).getMillis();
}

public int getWaitTimeoutS() {
Expand Down Expand Up @@ -3040,7 +3042,8 @@ public TQueryOptions toThrift() {
tResult.setQuery_timeout(Math.min(Integer.MAX_VALUE / 1000, queryTimeoutS));
tResult.setQuery_delivery_timeout(Math.min(Integer.MAX_VALUE / 1000, queryDeliveryTimeoutS));
tResult.setEnable_profile(enableProfile);
tResult.setBig_query_profile_second_threshold(bigQueryProfileSecondThreshold);
tResult.setBig_query_profile_threshold(TimeValue.parseTimeValue(bigQueryProfileThreshold).getMillis());
tResult.setBig_query_profile_threshold_unit(TTimeUnit.MILLISECOND);
tResult.setRuntime_profile_report_interval(runtimeProfileReportInterval);
tResult.setBatch_size(chunkSize);
tResult.setLoad_mem_limit(loadMemLimit);
Expand Down
Loading
Loading