diff --git a/charts/otel-config/Chart.yaml b/charts/otel-config/Chart.yaml new file mode 100644 index 0000000..85df263 --- /dev/null +++ b/charts/otel-config/Chart.yaml @@ -0,0 +1,6 @@ +apiVersion: v2 +name: opentelemetry-chart +description: A Helm chart for OpenTelemetry configurations +type: application +version: 0.1.0 +appVersion: 1.0.0 diff --git a/charts/otel-config/templates/otel-collector-metrics.yaml b/charts/otel-config/templates/otel-collector-metrics.yaml new file mode 100644 index 0000000..b82b3b6 --- /dev/null +++ b/charts/otel-config/templates/otel-collector-metrics.yaml @@ -0,0 +1,24 @@ +{{- if .Values.serviceScrape.enabled }} +apiVersion: operator.victoriametrics.com/v1beta1 +kind: VMServiceScrape +metadata: + labels: + {{- range $key, $value := .Values.serviceScrape.labels }} + {{ $key }}: {{ $value }} + {{- end }} + name: {{ .Values.serviceScrape.name }} + namespace: {{ .Values.serviceScrape.namespace }} +spec: + endpoints: + - interval: {{ .Values.serviceScrape.endpoints.interval }} + path: {{ .Values.serviceScrape.endpoints.path }} + targetPort: {{ .Values.serviceScrape.endpoints.targetPort }} + namespaceSelector: + matchNames: + - {{ .Values.serviceScrape.namespace }} + selector: + matchLabels: + {{- range $key, $value := .Values.serviceScrape.selector }} + {{ $key }}: {{ $value }} + {{- end }} +{{- end }} diff --git a/charts/otel-config/templates/otel-collector.yaml b/charts/otel-config/templates/otel-collector.yaml new file mode 100644 index 0000000..c5a5690 --- /dev/null +++ b/charts/otel-config/templates/otel-collector.yaml @@ -0,0 +1,22 @@ +{{- if .Values.collector.enabled }} +apiVersion: opentelemetry.io/v1beta1 +kind: OpenTelemetryCollector +metadata: + name: {{ .Values.collector.name }} + namespace: {{ .Values.collector.namespace }} +spec: + mode: {{ .Values.collector.mode }} + resources: + requests: + cpu: {{ .Values.collector.resources.requests.cpu }} + memory: {{ .Values.collector.resources.requests.memory }} + limits: + cpu: {{ .Values.collector.resources.limits.cpu }} + memory: {{ .Values.collector.resources.limits.memory }} + autoscaler: + minReplicas: {{ .Values.collector.autoscaler.minReplicas }} + maxReplicas: {{ .Values.collector.autoscaler.maxReplicas }} + targetCPUUtilization: {{ .Values.collector.autoscaler.targetCPUUtilization }} + targetMemoryUtilization: {{ .Values.collector.autoscaler.targetMemoryUtilization }} + config: {{ .Values.collector.config | toYaml | nindent 4 }} +{{- end }} diff --git a/charts/otel-config/templates/otel-instrumentation.yaml b/charts/otel-config/templates/otel-instrumentation.yaml new file mode 100644 index 0000000..37b7806 --- /dev/null +++ b/charts/otel-config/templates/otel-instrumentation.yaml @@ -0,0 +1,29 @@ +{{- if .Values.instrumentation.enabled }} +apiVersion: opentelemetry.io/v1alpha1 +kind: Instrumentation +metadata: + name: {{ .Values.instrumentation.name }} + namespace: {{ .Values.instrumentation.namespace }} +spec: + {{- if .Values.instrumentation.exporter }} + exporter: + endpoint: {{ .Values.instrumentation.exporter.endpoint }} + {{- end }} + propagators: +{{- range .Values.instrumentation.propagators }} + - {{ . }} +{{- end }} + sampler: + type: {{ .Values.instrumentation.sampler.type }} + argument: {{ .Values.instrumentation.sampler.argument | quote }} + {{- if .Values.instrumentation.languages }} + {{- range $language, $envs := .Values.instrumentation.languages }} + {{ $language }}: + env: + {{- range $envs }} + - name: {{ .name }} + value: {{ .value }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/charts/otel-config/values.yaml b/charts/otel-config/values.yaml new file mode 100644 index 0000000..895df28 --- /dev/null +++ b/charts/otel-config/values.yaml @@ -0,0 +1,168 @@ +instrumentation: + enabled: true + name: otel-instrumentation + namespace: observability + + exporter: + endpoint: http://opentelemetry-collector.observability.svc.cluster.local:4317 + + propagators: + - tracecontext + - baggage + - b3 + + sampler: + type: parentbased_traceidratio + argument: "0.10" + + languages: + python: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://opentelemetry-collector.observability.svc.cluster.local:4318 + dotnet: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://opentelemetry-collector.observability.svc.cluster.local:4318 + go: + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: http://opentelemetry-collector.observability.svc.cluster.local:4318 + +serviceScrape: + enabled: true + name: otel-collector-metrics-sm + namespace: observability + endpoints: + interval: 30s + path: /metrics + targetPort: 8888 + labels: + app: otel-collector-metrics + exclude: "true" + prometheus: kube + selector: + app.kubernetes.io/name: opentelemetry-collector-monitoring + operator.opentelemetry.io/collector-service-type: monitoring + +collector: + enabled: true + name: opentelemetry + namespace: observability + mode: deployment + resources: + requests: + cpu: 0.5 + memory: 500Mi + limits: + cpu: 0.5 + memory: 500Mi + autoscaler: + minReplicas: 1 + maxReplicas: 10 + targetCPUUtilization: 80 + targetMemoryUtilization: 80 + config: | + connectors: + spanmetrics: + dimensions: + - name: http.method + - name: http.status_code + - default: / + name: http.route + dimensions_cache_size: 1000 + events: + dimensions: + - name: exception.type + - name: exception.message + enabled: true + exemplars: + enabled: true + metrics_expiration: 5m + metrics_flush_interval: 15s + exporters: + debug: {} + otlp: + endpoint: tempo:4317 + tls: + insecure: true + prometheusremotewrite: + endpoint: http://vminsert-vm.monitoring.svc:8480/insert/0/prometheus/api/v1/write + export_created_metric: + enabled: true + max_batch_size_bytes: 8192 + remote_write_queue: + enabled: true + num_consumers: 100 + queue_size: 100000 + retry_on_failure: + enabled: true + initial_interval: 5s + max_elapsed_time: 300s + max_interval: 30s + timeout: 300s + tls: + insecure_skip_verify: true + processors: + batch: + send_batch_max_size: 8192 + send_batch_size: 1024 + timeout: 60s + memory_limiter: + check_interval: 10s + limit_percentage: 75 + spike_limit_percentage: 50 + tail_sampling: + decision_wait: 60s + policies: + - name: error-policy + status_code: + status_codes: + - ERROR + type: status_code + - latency: + threshold_ms: 500 + name: latency-policy + type: latency + - name: probabilistic + probabilistic: + sampling_percentage: 10 + type: probabilistic + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + keepalive: + server_parameters: + max_connection_age: 5m + max_connection_age_grace: 5m + max_connection_idle: 1m + time: 3m + timeout: 5m + http: + endpoint: 0.0.0.0:4318 + service: + pipelines: + metrics: + exporters: + - debug + - prometheusremotewrite + processors: + - memory_limiter + - batch + receivers: + - spanmetrics + - otlp + traces: + exporters: + - debug + - otlp + - spanmetrics + processors: + - memory_limiter + - batch + - tail_sampling + receivers: + - otlp + telemetry: + metrics: + address: 0.0.0.0:8888 + level: detailed