From 032c5b3554f95b60991d0506cd41a445d546fcef Mon Sep 17 00:00:00 2001 From: Timothy Mothra Lee Date: Mon, 26 Feb 2024 18:46:44 -0800 Subject: [PATCH] implement process counters --- .../LiveMetricsExporterEventSource.cs | 9 ++ .../src/Internals/Manager.Metrics.cs | 128 ++++++++++++++---- .../src/Internals/Manager.State.cs | 3 + 3 files changed, 116 insertions(+), 24 deletions(-) diff --git a/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Diagnostics/LiveMetricsExporterEventSource.cs b/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Diagnostics/LiveMetricsExporterEventSource.cs index d23b8b30ac5ab..3455f9b68c98c 100644 --- a/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Diagnostics/LiveMetricsExporterEventSource.cs +++ b/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Diagnostics/LiveMetricsExporterEventSource.cs @@ -195,5 +195,14 @@ public void DroppedDocument(DocumentIngressDocumentType documentType) [Event(12, Message = "Document was dropped. DocumentType: {0}. Not user actionable.", Level = EventLevel.Warning)] public void DroppedDocument(string documentType) => WriteEvent(12, documentType); + + [Event(13, Message = "Failure to calculate CPU Counter. Unexpected negative timespan: PreviousCollectedTime: {0}. RecentCollectedTime: {0}. Not user actionable.", Level = EventLevel.Error)] + public void ProcessCountersUnexpectedNegativeTimeSpan(long previousCollectedTime, long recentCollectedTime) => WriteEvent(13, previousCollectedTime, recentCollectedTime); + + [Event(14, Message = "Failure to calculate CPU Counter. Unexpected negative value: PreviousCollectedValue: {0}. RecentCollectedValue: {0}. Not user actionable.", Level = EventLevel.Error)] + public void ProcessCountersUnexpectedNegativeValue(long previousCollectedValue, long recentCollectedValue) => WriteEvent(14, previousCollectedValue, recentCollectedValue); + + [Event(15, Message = "Calculated Cpu Counter: Period: {0}. DiffValue: {1}. CalculatedValue: {2}. ProcessorCount: {3}. NormalizedValue: {4}", Level = EventLevel.Verbose)] + public void ProcessCountersCpuCounter(long period, long diffValue, double calculatedValue, int processorCount, double normalizedValue) => WriteEvent(15, period, diffValue, calculatedValue, processorCount, normalizedValue); } } diff --git a/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.Metrics.cs b/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.Metrics.cs index a8a0445af6da9..0bfb40a37366f 100644 --- a/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.Metrics.cs +++ b/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.Metrics.cs @@ -18,8 +18,8 @@ internal partial class Manager internal readonly DoubleBuffer _documentBuffer = new(); internal static bool? s_isAzureWebApp = null; - //private readonly PerformanceCounter _performanceCounter_ProcessorTime = new(categoryName: "Processor", counterName: "% Processor Time", instanceName: "_Total"); - //private readonly PerformanceCounter _performanceCounter_CommittedBytes = new(categoryName: "Memory", counterName: "Committed Bytes"); + private DateTimeOffset cachedCollectedTime = DateTimeOffset.MinValue; + private long cachedCollectedValue = 0; public MonitoringDataPoint GetDataPoint() { @@ -91,32 +91,47 @@ public MonitoringDataPoint GetDataPoint() dataPoint.Metrics.Add(metricPoint); } - // TODO: Reenable Perf Counters - //foreach (var metricPoint in CollectPerfCounters()) - //{ - // dataPoint.Metrics.Add(metricPoint); - //} + foreach (var metricPoint in CollectPerfCounters()) + { + dataPoint.Metrics.Add(metricPoint); + } return dataPoint; } - //public IEnumerable CollectPerfCounters() - //{ - // // PERFORMANCE COUNTERS - // yield return new Models.MetricPoint - // { - // Name = LiveMetricConstants.MetricId.MemoryCommittedBytesMetricIdValue, - // Value = _performanceCounter_CommittedBytes.NextValue(), - // Weight = 1 - // }; - - // yield return new Models.MetricPoint - // { - // Name = LiveMetricConstants.MetricId.ProcessorTimeMetricIdValue, - // Value = _performanceCounter_ProcessorTime.NextValue(), - // Weight = 1 - // }; - //} + /// + /// Collect Perf Counters for the current process. + /// + /// + /// For Memory: + /// . + /// "The amount of memory, in bytes, allocated for the associated process that cannot be shared with other processes.". + /// + /// For CPU: + /// . + /// "A TimeSpan that indicates the amount of time that the associated process has spent utilizing the CPU. This value is the sum of the UserProcessorTime and the PrivilegedProcessorTime.". + /// + public IEnumerable CollectPerfCounters() + { + var process = Process.GetCurrentProcess(); + + yield return new Models.MetricPoint + { + Name = LiveMetricConstants.MetricId.MemoryCommittedBytesMetricIdValue, + Value = process.WorkingSet64, + Weight = 1 + }; + + if (TryCalculateCPUCounter(process, out var processorValue)) + { + yield return new Models.MetricPoint + { + Name = LiveMetricConstants.MetricId.ProcessorTimeMetricIdValue, + Value = Convert.ToSingle(processorValue), + Weight = 1 + }; + } + } /// /// Searches for the environment variable specific to Azure Web App. @@ -149,5 +164,70 @@ public MonitoringDataPoint GetDataPoint() return s_isAzureWebApp; } + + private void ResetCachedValues() + { + this.cachedCollectedTime = DateTimeOffset.MinValue; + this.cachedCollectedValue = 0; + } + + /// + /// Calcualte the CPU usage as the diff between two ticks divided by the period of time, and then divided by the number of processors. + /// + private bool TryCalculateCPUCounter(Process process, out double normalizedValue) + { + var previousCollectedValue = this.cachedCollectedValue; + var previousCollectedTime = this.cachedCollectedTime; + + var recentCollectedValue = this.cachedCollectedValue = process.TotalProcessorTime.Ticks; + var recentCollectedTime = this.cachedCollectedTime = DateTimeOffset.UtcNow; + + var processorCount = Environment.ProcessorCount; + + double calculatedValue; + + if (previousCollectedTime == DateTimeOffset.MinValue) + { + Debug.WriteLine($"{nameof(TryCalculateCPUCounter)} DateTimeOffset.MinValue"); + normalizedValue = default; + return false; + } + + var period = recentCollectedTime.Ticks - previousCollectedTime.Ticks; + if (period < 0) + { + // Not likely to happen but being safe here incase of clock issues in multi-core. + LiveMetricsExporterEventSource.Log.ProcessCountersUnexpectedNegativeTimeSpan( + previousCollectedTime: previousCollectedTime.Ticks, + recentCollectedTime: recentCollectedTime.Ticks); + Debug.WriteLine($"{nameof(TryCalculateCPUCounter)} period less than zero"); + normalizedValue = default; + return false; + } + + var diff = recentCollectedValue - previousCollectedValue; + if (diff < 0) + { + LiveMetricsExporterEventSource.Log.ProcessCountersUnexpectedNegativeValue( + previousCollectedValue: previousCollectedValue, + recentCollectedValue: recentCollectedValue); + Debug.WriteLine($"{nameof(TryCalculateCPUCounter)} diff less than zero"); + normalizedValue = default; + return false; + } + + period = period != 0 ? period : 1; + calculatedValue = diff * 100.0 / period; + normalizedValue = calculatedValue / processorCount; + LiveMetricsExporterEventSource.Log.ProcessCountersCpuCounter( + period: previousCollectedValue, + diffValue: recentCollectedValue, + calculatedValue: calculatedValue, + processorCount: processorCount, + normalizedValue: normalizedValue); + // TryCalculateCPUCounter period: 10313304 diff: 64062500 calculatedValue: 621.1636930318354 processorCount: 8 normalizedValue: 77.64546162897942 + Debug.WriteLine($"{nameof(TryCalculateCPUCounter)} period: {period} diff: {diff} calculatedValue: {calculatedValue} processorCount: {processorCount} normalizedValue: {normalizedValue}"); + return true; + } } } diff --git a/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.State.cs b/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.State.cs index 6dbd36ce39c10..ec0068a4d5894 100644 --- a/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.State.cs +++ b/sdk/monitor/Azure.Monitor.OpenTelemetry.LiveMetrics/src/Internals/Manager.State.cs @@ -71,6 +71,9 @@ private void SetPingState() // This is used in determining if we should Backoff. // If we've been in another state for X amount of time, that may exceed our maximum interval and immediately trigger a Backoff. _lastSuccessfulPing = DateTimeOffset.UtcNow; + + // Must reset the metrics cache here. + ResetCachedValues(); } private void SetPostState()