From a221f4c2b73c31f2a5c593084da2190c900e299d Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Wed, 15 Feb 2023 10:34:13 -0600 Subject: [PATCH 01/27] metrics manager working for task nodes Signed-off-by: Daniel Rammer --- go.mod | 2 + go.sum | 2 - pkg/manager/impl/metrics_manager.go | 414 +++++++++++++++++++++++++ pkg/manager/interfaces/metrics.go | 17 + pkg/manager/mocks/metrics_interface.go | 98 ++++++ pkg/rpc/adminservice/base.go | 14 +- pkg/rpc/adminservice/execution.go | 18 ++ pkg/rpc/adminservice/node_execution.go | 18 ++ 8 files changed, 577 insertions(+), 6 deletions(-) create mode 100644 pkg/manager/impl/metrics_manager.go create mode 100644 pkg/manager/interfaces/metrics.go create mode 100644 pkg/manager/mocks/metrics_interface.go diff --git a/go.mod b/go.mod index 8d02e2e1b..4ccc4804f 100644 --- a/go.mod +++ b/go.mod @@ -208,3 +208,5 @@ require ( ) replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 + +replace github.com/flyteorg/flyteidl => ../flyteidl diff --git a/go.sum b/go.sum index ca0591d62..51dfb9abe 100644 --- a/go.sum +++ b/go.sum @@ -308,8 +308,6 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flyteorg/flyteidl v1.3.7 h1:MA7kOqMr/TmPlYPvJZwfsl+CYneuDOJ+kEKx2DocLhE= -github.com/flyteorg/flyteidl v1.3.7/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go new file mode 100644 index 000000000..44c11cb9a --- /dev/null +++ b/pkg/manager/impl/metrics_manager.go @@ -0,0 +1,414 @@ +package impl + +import ( + "context" + //"fmt" + "time" + + "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" + repoInterfaces "github.com/flyteorg/flyteadmin/pkg/repositories/interfaces" + + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + + "github.com/flyteorg/flytestdlib/promutils" + + "github.com/golang/protobuf/ptypes/timestamp" + + "google.golang.org/protobuf/types/known/timestamppb" +) + +type metrics struct { + Scope promutils.Scope + //Set labeled.Counter +} + +type MetricsManager struct { + db repoInterfaces.Repository + workflowManager interfaces.WorkflowInterface + executionManager interfaces.ExecutionInterface + nodeExecutionManager interfaces.NodeExecutionInterface + taskExecutionManager interfaces.TaskExecutionInterface + metrics metrics +} + +func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category admin.CategoricalSpanInfo_Category) *admin.Span { + return &admin.Span{ + StartTime: startTime, + EndTime: endTime, + Info: &admin.Span_Category{ + Category: &admin.CategoricalSpanInfo{ + Category: category, + }, + }, + } +} + +func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nodeId string, executionId *core.WorkflowExecutionIdentifier, + upstreamNodeIds map[string]*core.ConnectionSet_IdList, nodeCache map[string]*admin.NodeExecution) (*admin.NodeExecution, error) { + + var nodeExecution *admin.NodeExecution + var latestUpstreamUpdatedAt = time.Unix(0, 0) + if connectionSet, exists := upstreamNodeIds[nodeId]; exists { + for _, upstreamNodeId := range connectionSet.Ids { + upstreamNodeExecution, err := m.getNodeExecution(ctx, upstreamNodeId, executionId, nodeCache) + if err != nil { + return nil, err // TODO @hamersaw - is this right? + } + + t := upstreamNodeExecution.Closure.UpdatedAt.AsTime() + if t.After(latestUpstreamUpdatedAt) { + nodeExecution = upstreamNodeExecution + latestUpstreamUpdatedAt = t + } + } + } + + return nodeExecution, nil +} + +// TODO @hamersaw - docs +func (m *MetricsManager) getNodeExecution(ctx context.Context, nodeId string, executionId *core.WorkflowExecutionIdentifier, + nodeCache map[string]*admin.NodeExecution) (*admin.NodeExecution, error) { + + // if node already exists in cache -> use it + if nodeExecution, exists := nodeCache[nodeId]; exists { + return nodeExecution, nil + } + + // retrieve node execution + nodeRequest := admin.NodeExecutionGetRequest{ + Id: &core.NodeExecutionIdentifier{ + NodeId: nodeId, + ExecutionId: executionId, + }, + } + + nodeExecution, err := m.nodeExecutionManager.GetNodeExecution(ctx, nodeRequest) + if err != nil { + return nil, err + } + + // populate cache and return + nodeCache[nodeId] = nodeExecution + return nodeExecution, nil +} + +func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Execution, depth int) (*admin.Span, error) { + referenceSpan := &admin.ReferenceSpanInfo{ + Id: &admin.ReferenceSpanInfo_WorkflowId{ + WorkflowId: execution.Id, + }, + } + + if depth != 0 { + spans := make([]*admin.Span, 0) // TODO @hamersaw how to make an array + nodeCache := make(map[string]*admin.NodeExecution) + + // retrieve workflow, execution, and node executions + workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} + workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) + if err != nil { + return nil, err + } + + nodeExecutions := make([]*admin.NodeExecution, 0) + nodeListRequest := admin.NodeExecutionListRequest{ + WorkflowExecutionId: execution.Id, + Limit: 20, // TODO @hamersaw - parameterize? + } + + for { + nodeListResponse, err := m.nodeExecutionManager.ListNodeExecutions(ctx, nodeListRequest) + if err != nil { + return nil, err + } + + for _, nodeExecution := range nodeListResponse.NodeExecutions { + nodeExecutions = append(nodeExecutions, nodeExecution) + } + + if len(nodeListResponse.NodeExecutions) < int(nodeListRequest.Limit) { + break + } + + nodeListRequest.Token = nodeListResponse.Token + } + + // TODO @hamersaw - sort nodeExecutions by CreatedAt + + // compute frontend overhead + startNode, err := m.getNodeExecution(ctx, "start-node", execution.Id, nodeCache) + if err != nil { + return nil, err + } + + spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // iterate over nodes and compute overhead + if err := m.parseNodeExecutions(ctx, nodeExecutions, &spans, depth); err != nil { + return nil, err + } + + // compute backend overhead + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", execution.Id, + workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeCache) + if err != nil { + return nil, err + } + + spans = append(spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + referenceSpan.Spans = spans + } + + return &admin.Span{ + StartTime: execution.Closure.CreatedAt, + EndTime: execution.Closure.UpdatedAt, + Info: &admin.Span_Reference{ + Reference: referenceSpan, + }, + }, nil +} + +func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions []*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + for _, nodeExecution := range nodeExecutions { + if nodeExecution.Id.NodeId == "start-node" || nodeExecution.Id.NodeId == "end-node" { + continue + } + + nodeExecutionSpan, err := m.parseNodeExecution(ctx, nodeExecution, depth-1) + if err != nil { + return err + } + // TODO @hamersaw - prepend nodeExecution spans with NODE_TRANSITION time + + *spans = append(*spans, nodeExecutionSpan) + } + + return nil +} + +func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, depth int) (*admin.Span, error) { + referenceSpan := &admin.ReferenceSpanInfo{ + Id: &admin.ReferenceSpanInfo_NodeId{ + NodeId: nodeExecution.Id, + }, + } + + if depth != 0 { + spans := make([]*admin.Span, 0) // TODO @hamersaw how to make an array + + taskExecutions := make([]*admin.TaskExecution, 0) + taskListRequest := admin.TaskExecutionListRequest{ + NodeExecutionId: nodeExecution.Id, + Limit: 20, // TODO @hamersaw - parameterize? + } + + for { + taskListResponse, err := m.taskExecutionManager.ListTaskExecutions(ctx, taskListRequest) + if err != nil { + return nil, err + } + + for _, taskExecution := range taskListResponse.TaskExecutions { + taskExecutions = append(taskExecutions, taskExecution) + } + + if len(taskListResponse.TaskExecutions) < int(taskListRequest.Limit) { + break + } + + taskListRequest.Token = taskListResponse.Token + } + + // TODO @hamersaw - sort taskExecutions by CreatedAt + /*sort.Slice(a, func(i, j int) bool { + return a[i] < a[j] + })*/ + + nodeExecutions := make([]*admin.NodeExecution, 0) + nodeListRequest := admin.NodeExecutionListRequest{ + WorkflowExecutionId: nodeExecution.Id.ExecutionId, + Limit: 20, // TODO @hamersaw - parameterize? + UniqueParentId: nodeExecution.Id.NodeId, + } + + // TODO - refactor this out! + for { + nodeListResponse, err := m.nodeExecutionManager.ListNodeExecutions(ctx, nodeListRequest) + if err != nil { + return nil, err + } + + for _, nodeExecution := range nodeListResponse.NodeExecutions { + nodeExecutions = append(nodeExecutions, nodeExecution) + } + + if len(nodeListResponse.NodeExecutions) < int(nodeListRequest.Limit) { + break + } + + nodeListRequest.Token = nodeListResponse.Token + } + + if !nodeExecution.Metadata.IsParentNode && len(taskExecutions) > 0 { + // parse task node + m.parseTaskNodeExecution(ctx, nodeExecution, taskExecutions, &spans, depth-1) + } else if nodeExecution.Metadata.IsParentNode && len(taskExecutions) > 0 { + // TODO @hamersaw - dynamic node + if err := m.parseDynamicNodeExecution(ctx, nodeExecution, taskExecutions, nodeExecutions, &spans, depth-1); err != nil { + return nil, err + } + //} else if nodeExecution.Metadata.IsParentNode && nodeExecution.Closure.Target? is a WorkflowNode + //} else if nodeExecution.Metadata.IsParentNode && HAS_UNDERLYING_NODE_EXECUTIONS + } else { + // TODO @hamersaw process branch, gate, launchplan, subworkflow + } + + referenceSpan.Spans = spans + } + + return &admin.Span{ + StartTime: nodeExecution.Closure.CreatedAt, + EndTime: nodeExecution.Closure.UpdatedAt, + Info: &admin.Span_Reference{ + Reference: referenceSpan, + }, + }, nil +} + +func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, + taskExecutions []*admin.TaskExecution, nodeExecutions []*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + parseTaskExecutions(taskExecutions, spans, depth) + + // TODO @hamersaw - frontend overhead + //*spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + // nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // TODO @hamersaw - correctly parse dynamic nodes + if err := m.parseNodeExecutions(ctx, nodeExecutions, spans, depth); err != nil { + return err + } + + // TODO @hamersaw - backend overhead + + return nil +} + +func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, + taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { + + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + parseTaskExecutions(taskExecutions, spans, depth) + + *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) +} + +func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { + for index, taskExecution := range taskExecutions { + if index > 0 { + *spans = append(*spans, createCategoricalSpan(taskExecutions[index-1].Closure.UpdatedAt, + taskExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + + if depth != 0 { + *spans = append(*spans, parseTaskExecution(taskExecution)) + } + } +} + +func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { + spans := make([]*admin.Span, 0) + spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, + taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + + taskEndTime := timestamppb.New(taskExecution.Closure.StartedAt.AsTime().Add(taskExecution.Closure.Duration.AsDuration())) + spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, + taskEndTime, admin.CategoricalSpanInfo_PLUGIN_EXECUTION)) + + spans = append(spans, createCategoricalSpan(taskEndTime, + taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + + return &admin.Span{ + StartTime: taskExecution.Closure.CreatedAt, + EndTime: taskExecution.Closure.UpdatedAt, + Info: &admin.Span_Reference{ + Reference: &admin.ReferenceSpanInfo{ + Id: &admin.ReferenceSpanInfo_TaskId{ + TaskId: taskExecution.Id, + }, + Spans: spans, + }, + }, + } +} + +// TODO @hamersaw - docs +func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, + request admin.WorkflowExecutionGetMetricsRequest) (*admin.WorkflowExecutionGetMetricsResponse, error) { + + // retrieve workflow execution + executionRequest := admin.WorkflowExecutionGetRequest{Id: request.Id} + execution, err := m.executionManager.GetExecution(ctx, executionRequest) + if err != nil { + return nil, err + } + + span, err := m.parseExecution(ctx, execution, int(request.Depth)) + if err != nil { + return nil, err + } + + return &admin.WorkflowExecutionGetMetricsResponse{Span: span}, nil +} + +// TODO @hamersaw docs +func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, + request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { + + // retrieve node and task executions + nodeCache := make(map[string]*admin.NodeExecution) + nodeExecution, err := m.getNodeExecution(ctx, request.Id.NodeId, request.Id.ExecutionId, nodeCache) + if err != nil { + return nil, err + } + + span, err := m.parseNodeExecution(ctx, nodeExecution, int(request.Depth)) + if err != nil { + return nil, err + } + + return &admin.NodeExecutionGetMetricsResponse{Span: span}, nil +} + +func NewMetricsManager( + db repoInterfaces.Repository, + workflowManager interfaces.WorkflowInterface, + executionManager interfaces.ExecutionInterface, + nodeExecutionManager interfaces.NodeExecutionInterface, + taskExecutionManager interfaces.TaskExecutionInterface, + scope promutils.Scope) interfaces.MetricsInterface { + metrics := metrics{ + Scope: scope, + //Set: labeled.NewCounter("num_set", "count of set metricss", scope), + } + + return &MetricsManager{ + db: db, + workflowManager: workflowManager, + executionManager: executionManager, + nodeExecutionManager: nodeExecutionManager, + taskExecutionManager: taskExecutionManager, + metrics: metrics, + } +} diff --git a/pkg/manager/interfaces/metrics.go b/pkg/manager/interfaces/metrics.go new file mode 100644 index 000000000..84d9c9100 --- /dev/null +++ b/pkg/manager/interfaces/metrics.go @@ -0,0 +1,17 @@ +package interfaces + +import ( + "context" + + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" +) + +//go:generate mockery -name=MetricsInterface -output=../mocks -case=underscore + +// Interface for managing Flyte execution metrics +type MetricsInterface interface { + GetExecutionMetrics(ctx context.Context, request admin.WorkflowExecutionGetMetricsRequest) ( + *admin.WorkflowExecutionGetMetricsResponse, error) + GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) ( + *admin.NodeExecutionGetMetricsResponse, error) +} diff --git a/pkg/manager/mocks/metrics_interface.go b/pkg/manager/mocks/metrics_interface.go new file mode 100644 index 000000000..b29423d51 --- /dev/null +++ b/pkg/manager/mocks/metrics_interface.go @@ -0,0 +1,98 @@ +// Code generated by mockery v1.0.1. DO NOT EDIT. + +package mocks + +import ( + context "context" + + admin "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + + mock "github.com/stretchr/testify/mock" +) + +// MetricsInterface is an autogenerated mock type for the MetricsInterface type +type MetricsInterface struct { + mock.Mock +} + +type MetricsInterface_GetExecutionMetrics struct { + *mock.Call +} + +func (_m MetricsInterface_GetExecutionMetrics) Return(_a0 *admin.WorkflowExecutionGetMetricsResponse, _a1 error) *MetricsInterface_GetExecutionMetrics { + return &MetricsInterface_GetExecutionMetrics{Call: _m.Call.Return(_a0, _a1)} +} + +func (_m *MetricsInterface) OnGetExecutionMetrics(ctx context.Context, request admin.WorkflowExecutionGetMetricsRequest) *MetricsInterface_GetExecutionMetrics { + c_call := _m.On("GetExecutionMetrics", ctx, request) + return &MetricsInterface_GetExecutionMetrics{Call: c_call} +} + +func (_m *MetricsInterface) OnGetExecutionMetricsMatch(matchers ...interface{}) *MetricsInterface_GetExecutionMetrics { + c_call := _m.On("GetExecutionMetrics", matchers...) + return &MetricsInterface_GetExecutionMetrics{Call: c_call} +} + +// GetExecutionMetrics provides a mock function with given fields: ctx, request +func (_m *MetricsInterface) GetExecutionMetrics(ctx context.Context, request admin.WorkflowExecutionGetMetricsRequest) (*admin.WorkflowExecutionGetMetricsResponse, error) { + ret := _m.Called(ctx, request) + + var r0 *admin.WorkflowExecutionGetMetricsResponse + if rf, ok := ret.Get(0).(func(context.Context, admin.WorkflowExecutionGetMetricsRequest) *admin.WorkflowExecutionGetMetricsResponse); ok { + r0 = rf(ctx, request) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*admin.WorkflowExecutionGetMetricsResponse) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, admin.WorkflowExecutionGetMetricsRequest) error); ok { + r1 = rf(ctx, request) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +type MetricsInterface_GetNodeExecutionMetrics struct { + *mock.Call +} + +func (_m MetricsInterface_GetNodeExecutionMetrics) Return(_a0 *admin.NodeExecutionGetMetricsResponse, _a1 error) *MetricsInterface_GetNodeExecutionMetrics { + return &MetricsInterface_GetNodeExecutionMetrics{Call: _m.Call.Return(_a0, _a1)} +} + +func (_m *MetricsInterface) OnGetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) *MetricsInterface_GetNodeExecutionMetrics { + c_call := _m.On("GetNodeExecutionMetrics", ctx, request) + return &MetricsInterface_GetNodeExecutionMetrics{Call: c_call} +} + +func (_m *MetricsInterface) OnGetNodeExecutionMetricsMatch(matchers ...interface{}) *MetricsInterface_GetNodeExecutionMetrics { + c_call := _m.On("GetNodeExecutionMetrics", matchers...) + return &MetricsInterface_GetNodeExecutionMetrics{Call: c_call} +} + +// GetNodeExecutionMetrics provides a mock function with given fields: ctx, request +func (_m *MetricsInterface) GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { + ret := _m.Called(ctx, request) + + var r0 *admin.NodeExecutionGetMetricsResponse + if rf, ok := ret.Get(0).(func(context.Context, admin.NodeExecutionGetMetricsRequest) *admin.NodeExecutionGetMetricsResponse); ok { + r0 = rf(ctx, request) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*admin.NodeExecutionGetMetricsResponse) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, admin.NodeExecutionGetMetricsRequest) error); ok { + r1 = rf(ctx, request) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} diff --git a/pkg/rpc/adminservice/base.go b/pkg/rpc/adminservice/base.go index 12b9f4ce5..ebaf7e0e7 100644 --- a/pkg/rpc/adminservice/base.go +++ b/pkg/rpc/adminservice/base.go @@ -45,6 +45,7 @@ type AdminService struct { NamedEntityManager interfaces.NamedEntityInterface VersionManager interfaces.VersionInterface DescriptionEntityManager interfaces.DescriptionEntityInterface + MetricsManager interfaces.MetricsInterface Metrics AdminMetrics } @@ -157,6 +158,11 @@ func NewAdminServer(ctx context.Context, pluginRegistry *plugins.Registry, confi nodeExecutionEventWriter.Run() }() + nodeExecutionManager := manager.NewNodeExecutionManager(repo, configuration, applicationConfiguration.GetMetadataStoragePrefix(), dataStorageClient, + adminScope.NewSubScope("node_execution_manager"), urlData, eventPublisher, cloudEventPublisher, nodeExecutionEventWriter) + taskExecutionManager := manager.NewTaskExecutionManager(repo, configuration, dataStorageClient, + adminScope.NewSubScope("task_execution_manager"), urlData, eventPublisher, cloudEventPublisher) + logger.Info(ctx, "Initializing a new AdminService") return &AdminService{ TaskManager: manager.NewTaskManager(repo, configuration, workflowengineImpl.NewCompiler(), @@ -167,12 +173,12 @@ func NewAdminServer(ctx context.Context, pluginRegistry *plugins.Registry, confi NamedEntityManager: namedEntityManager, DescriptionEntityManager: descriptionEntityManager, VersionManager: versionManager, - NodeExecutionManager: manager.NewNodeExecutionManager(repo, configuration, applicationConfiguration.GetMetadataStoragePrefix(), dataStorageClient, - adminScope.NewSubScope("node_execution_manager"), urlData, eventPublisher, cloudEventPublisher, nodeExecutionEventWriter), - TaskExecutionManager: manager.NewTaskExecutionManager(repo, configuration, dataStorageClient, - adminScope.NewSubScope("task_execution_manager"), urlData, eventPublisher, cloudEventPublisher), + NodeExecutionManager: nodeExecutionManager, + TaskExecutionManager: taskExecutionManager, ProjectManager: manager.NewProjectManager(repo, configuration), ResourceManager: resources.NewResourceManager(repo, configuration.ApplicationConfiguration()), + MetricsManager: manager.NewMetricsManager(repo, workflowManager, executionManager, nodeExecutionManager, + taskExecutionManager, adminScope.NewSubScope("metrics_manager")), Metrics: InitMetrics(adminScope), } } diff --git a/pkg/rpc/adminservice/execution.go b/pkg/rpc/adminservice/execution.go index 57680a58f..403223de6 100644 --- a/pkg/rpc/adminservice/execution.go +++ b/pkg/rpc/adminservice/execution.go @@ -141,6 +141,24 @@ func (m *AdminService) GetExecutionData( return response, nil } +func (m *AdminService) GetExecutionMetrics( + ctx context.Context, request *admin.WorkflowExecutionGetMetricsRequest) (*admin.WorkflowExecutionGetMetricsResponse, error) { + defer m.interceptPanic(ctx, request) + if request == nil { + return nil, status.Errorf(codes.InvalidArgument, "Incorrect request, nil requests not allowed") + } + var response *admin.WorkflowExecutionGetMetricsResponse + var err error + m.Metrics.executionEndpointMetrics.get.Time(func() { + response, err = m.MetricsManager.GetExecutionMetrics(ctx, *request) + }) + if err != nil { + return nil, util.TransformAndRecordError(err, &m.Metrics.executionEndpointMetrics.getData) + } + m.Metrics.executionEndpointMetrics.getData.Success() + return response, nil +} + func (m *AdminService) ListExecutions( ctx context.Context, request *admin.ResourceListRequest) (*admin.ExecutionList, error) { defer m.interceptPanic(ctx, request) diff --git a/pkg/rpc/adminservice/node_execution.go b/pkg/rpc/adminservice/node_execution.go index b4cabedf3..df50e00cf 100644 --- a/pkg/rpc/adminservice/node_execution.go +++ b/pkg/rpc/adminservice/node_execution.go @@ -108,3 +108,21 @@ func (m *AdminService) GetNodeExecutionData( m.Metrics.nodeExecutionEndpointMetrics.getData.Success() return response, nil } + +func (m *AdminService) GetNodeExecutionMetrics( + ctx context.Context, request *admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { + defer m.interceptPanic(ctx, request) + if request == nil { + return nil, status.Errorf(codes.InvalidArgument, "Incorrect request, nil requests not allowed") + } + var response *admin.NodeExecutionGetMetricsResponse + var err error + m.Metrics.nodeExecutionEndpointMetrics.getData.Time(func() { + response, err = m.MetricsManager.GetNodeExecutionMetrics(ctx, *request) + }) + if err != nil { + return nil, util.TransformAndRecordError(err, &m.Metrics.nodeExecutionEndpointMetrics.getData) + } + m.Metrics.nodeExecutionEndpointMetrics.getData.Success() + return response, nil +} From b49bc56d985cdf8a6a80963162f512c2265006f0 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Wed, 15 Feb 2023 13:10:50 -0600 Subject: [PATCH 02/27] dynamic tasks working Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 98 ++++++++++++----------------- 1 file changed, 41 insertions(+), 57 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 44c11cb9a..6f173c997 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -2,7 +2,7 @@ package impl import ( "context" - //"fmt" + "fmt" "time" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" @@ -44,17 +44,14 @@ func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category adm } } -func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nodeId string, executionId *core.WorkflowExecutionIdentifier, - upstreamNodeIds map[string]*core.ConnectionSet_IdList, nodeCache map[string]*admin.NodeExecution) (*admin.NodeExecution, error) { +func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nodeId string, + upstreamNodeIds map[string]*core.ConnectionSet_IdList, nodeExecutions map[string]*admin.NodeExecution) (*admin.NodeExecution, error) { var nodeExecution *admin.NodeExecution var latestUpstreamUpdatedAt = time.Unix(0, 0) if connectionSet, exists := upstreamNodeIds[nodeId]; exists { for _, upstreamNodeId := range connectionSet.Ids { - upstreamNodeExecution, err := m.getNodeExecution(ctx, upstreamNodeId, executionId, nodeCache) - if err != nil { - return nil, err // TODO @hamersaw - is this right? - } + upstreamNodeExecution := nodeExecutions[upstreamNodeId] t := upstreamNodeExecution.Closure.UpdatedAt.AsTime() if t.After(latestUpstreamUpdatedAt) { @@ -67,33 +64,6 @@ func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nod return nodeExecution, nil } -// TODO @hamersaw - docs -func (m *MetricsManager) getNodeExecution(ctx context.Context, nodeId string, executionId *core.WorkflowExecutionIdentifier, - nodeCache map[string]*admin.NodeExecution) (*admin.NodeExecution, error) { - - // if node already exists in cache -> use it - if nodeExecution, exists := nodeCache[nodeId]; exists { - return nodeExecution, nil - } - - // retrieve node execution - nodeRequest := admin.NodeExecutionGetRequest{ - Id: &core.NodeExecutionIdentifier{ - NodeId: nodeId, - ExecutionId: executionId, - }, - } - - nodeExecution, err := m.nodeExecutionManager.GetNodeExecution(ctx, nodeRequest) - if err != nil { - return nil, err - } - - // populate cache and return - nodeCache[nodeId] = nodeExecution - return nodeExecution, nil -} - func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Execution, depth int) (*admin.Span, error) { referenceSpan := &admin.ReferenceSpanInfo{ Id: &admin.ReferenceSpanInfo_WorkflowId{ @@ -103,7 +73,6 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex if depth != 0 { spans := make([]*admin.Span, 0) // TODO @hamersaw how to make an array - nodeCache := make(map[string]*admin.NodeExecution) // retrieve workflow, execution, and node executions workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} @@ -112,7 +81,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex return nil, err } - nodeExecutions := make([]*admin.NodeExecution, 0) + nodeExecutions := make(map[string]*admin.NodeExecution) nodeListRequest := admin.NodeExecutionListRequest{ WorkflowExecutionId: execution.Id, Limit: 20, // TODO @hamersaw - parameterize? @@ -125,7 +94,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex } for _, nodeExecution := range nodeListResponse.NodeExecutions { - nodeExecutions = append(nodeExecutions, nodeExecution) + nodeExecutions[nodeExecution.Metadata.SpecNodeId] = nodeExecution } if len(nodeListResponse.NodeExecutions) < int(nodeListRequest.Limit) { @@ -138,11 +107,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex // TODO @hamersaw - sort nodeExecutions by CreatedAt // compute frontend overhead - startNode, err := m.getNodeExecution(ctx, "start-node", execution.Id, nodeCache) - if err != nil { - return nil, err - } - + startNode := nodeExecutions["start-node"] spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) @@ -152,8 +117,8 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex } // compute backend overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", execution.Id, - workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeCache) + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if err != nil { return nil, err } @@ -173,8 +138,9 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex }, nil } -func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions []*admin.NodeExecution, spans *[]*admin.Span, depth int) error { +func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { for _, nodeExecution := range nodeExecutions { + fmt.Printf("HAMERSAW - %s\n", nodeExecution.Id.NodeId) if nodeExecution.Id.NodeId == "start-node" || nodeExecution.Id.NodeId == "end-node" { continue } @@ -207,6 +173,7 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * Limit: 20, // TODO @hamersaw - parameterize? } + // TODO @hamersaw - refactor out task and node execution retrieval for { taskListResponse, err := m.taskExecutionManager.ListTaskExecutions(ctx, taskListRequest) if err != nil { @@ -229,7 +196,7 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * return a[i] < a[j] })*/ - nodeExecutions := make([]*admin.NodeExecution, 0) + nodeExecutions := make(map[string]*admin.NodeExecution) nodeListRequest := admin.NodeExecutionListRequest{ WorkflowExecutionId: nodeExecution.Id.ExecutionId, Limit: 20, // TODO @hamersaw - parameterize? @@ -244,7 +211,7 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * } for _, nodeExecution := range nodeListResponse.NodeExecutions { - nodeExecutions = append(nodeExecutions, nodeExecution) + nodeExecutions[nodeExecution.Metadata.SpecNodeId] = nodeExecution } if len(nodeListResponse.NodeExecutions) < int(nodeListRequest.Limit) { @@ -258,7 +225,7 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * // parse task node m.parseTaskNodeExecution(ctx, nodeExecution, taskExecutions, &spans, depth-1) } else if nodeExecution.Metadata.IsParentNode && len(taskExecutions) > 0 { - // TODO @hamersaw - dynamic node + // dynamic node if err := m.parseDynamicNodeExecution(ctx, nodeExecution, taskExecutions, nodeExecutions, &spans, depth-1); err != nil { return nil, err } @@ -281,23 +248,40 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * } func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, - taskExecutions []*admin.TaskExecution, nodeExecutions []*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + taskExecutions []*admin.TaskExecution, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + + getDataRequest := admin.NodeExecutionGetDataRequest{Id: nodeExecution.Id} + nodeExecutionData, err := m.nodeExecutionManager.GetNodeExecutionData(ctx, getDataRequest) + if err != nil { + return err + } + // frontend overhead *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // task execution(s) parseTaskExecutions(taskExecutions, spans, depth) - // TODO @hamersaw - frontend overhead - //*spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - // nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // between task execution(s) and node execution(s) overhead + startNode := nodeExecutions["start-node"] + *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - // TODO @hamersaw - correctly parse dynamic nodes + // node execution(s) if err := m.parseNodeExecutions(ctx, nodeExecutions, spans, depth); err != nil { return err } - // TODO @hamersaw - backend overhead + // backened overhead + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) + if err != nil { + return err + } + + *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) return nil } @@ -376,9 +360,9 @@ func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { - // retrieve node and task executions - nodeCache := make(map[string]*admin.NodeExecution) - nodeExecution, err := m.getNodeExecution(ctx, request.Id.NodeId, request.Id.ExecutionId, nodeCache) + // retrieve node executions + nodeRequest := admin.NodeExecutionGetRequest{Id: request.Id} + nodeExecution, err := m.nodeExecutionManager.GetNodeExecution(ctx, nodeRequest) if err != nil { return nil, err } From 2a1a0e36338cf9b5361ddb52d0b32292160183d4 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Wed, 15 Feb 2023 18:21:06 -0600 Subject: [PATCH 03/27] subworkflow node working Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 118 ++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 7 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 6f173c997..60ed0f329 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -2,11 +2,13 @@ package impl import ( "context" - "fmt" + //"fmt" + "reflect" "time" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" repoInterfaces "github.com/flyteorg/flyteadmin/pkg/repositories/interfaces" + "github.com/pkg/errors" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" @@ -140,7 +142,6 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { for _, nodeExecution := range nodeExecutions { - fmt.Printf("HAMERSAW - %s\n", nodeExecution.Id.NodeId) if nodeExecution.Id.NodeId == "start-node" || nodeExecution.Id.NodeId == "end-node" { continue } @@ -222,17 +223,25 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * } if !nodeExecution.Metadata.IsParentNode && len(taskExecutions) > 0 { - // parse task node + // handle task node m.parseTaskNodeExecution(ctx, nodeExecution, taskExecutions, &spans, depth-1) } else if nodeExecution.Metadata.IsParentNode && len(taskExecutions) > 0 { - // dynamic node + // handle dynamic node if err := m.parseDynamicNodeExecution(ctx, nodeExecution, taskExecutions, nodeExecutions, &spans, depth-1); err != nil { return nil, err } - //} else if nodeExecution.Metadata.IsParentNode && nodeExecution.Closure.Target? is a WorkflowNode - //} else if nodeExecution.Metadata.IsParentNode && HAS_UNDERLYING_NODE_EXECUTIONS + } else if !nodeExecution.Metadata.IsParentNode && nodeExecution.Closure.GetWorkflowNodeMetadata() != nil { + // handle launch plan + if err := m.parseLaunchPlanNodeExecution(ctx, nodeExecution, &spans, depth-1); err != nil { + return nil, err + } + } else if nodeExecution.Metadata.IsParentNode && len(nodeExecutions) > 0 { + // handle subworkflow + if err := m.parseSubworkflowNodeExecution(ctx, nodeExecution, nodeExecutions, &spans, depth-1); err != nil { + return nil, err + } } else { - // TODO @hamersaw process branch, gate, launchplan, subworkflow + // TODO @hamersaw process branch and gate nodes } referenceSpan.Spans = spans @@ -286,6 +295,101 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec return nil } +func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, + nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { + + // retrieve execution + workflowNode := nodeExecution.Closure.GetWorkflowNodeMetadata() + + executionRequest := admin.WorkflowExecutionGetRequest{Id: workflowNode.ExecutionId} + execution, err := m.executionManager.GetExecution(ctx, executionRequest) + if err != nil { + return err + } + + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + execution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // execution + span, err := m.parseExecution(ctx, execution, depth) + if err != nil { + return err + } + + *spans = append(*spans, span) + + // backend overhead + *spans = append(*spans, createCategoricalSpan(execution.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + return nil +} + +func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, + nodeExecution *admin.NodeExecution, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + + // TODO - retrieve subworkflow + executionRequest := admin.WorkflowExecutionGetRequest{Id: nodeExecution.Id.ExecutionId} + execution, err := m.executionManager.GetExecution(ctx, executionRequest) + if err != nil { + return err + } + + workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} + workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) + if err != nil { + return err + } + + // identify subworkflow from node id + var node *core.Node + for _, n := range workflow.Closure.CompiledWorkflow.Primary.Template.Nodes { + if n.Id == nodeExecution.Id.NodeId { + node = n + } + } + + if node == nil { + return errors.New("failed to identify subworkflow node") // TODO @hamersaw - do gooder + } + + subworkflowId := node.GetWorkflowNode().GetSubWorkflowRef() + + var subworkflow *core.CompiledWorkflow + for _, subworkflowRef := range workflow.Closure.CompiledWorkflow.SubWorkflows { + if reflect.DeepEqual(subworkflowId, subworkflowRef.Template.Id) { + subworkflow = subworkflowRef + } + } + + if subworkflow == nil { + return errors.New("failed to identify subworkflow") // TODO @hamersaw - do gooder + } + + // frontend overhead + startNode := nodeExecutions["start-node"] + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // node execution(s) + if err := m.parseNodeExecutions(ctx, nodeExecutions, spans, depth); err != nil { + return err + } + + // backened overhead + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + subworkflow.Connections.Upstream, nodeExecutions) + if err != nil { + return err + } + + *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + return nil +} + func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { From 06bccf08ee731cb4090d0147b0d1018824d08ad5 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Feb 2023 06:57:12 -0600 Subject: [PATCH 04/27] refactored to allow branch and gate node parsing Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 440 ++++++++++++++-------------- 1 file changed, 221 insertions(+), 219 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 60ed0f329..b9fe8048a 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -2,8 +2,8 @@ package impl import ( "context" - //"fmt" - "reflect" + "fmt" + //"reflect" "time" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" @@ -66,6 +66,89 @@ func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nod return nodeExecution, nil } +func (m *MetricsManager) getNodeExecutions(ctx context.Context, request admin.NodeExecutionListRequest) (map[string]*admin.NodeExecution, error) { + nodeExecutions := make(map[string]*admin.NodeExecution) + for { + response, err := m.nodeExecutionManager.ListNodeExecutions(ctx, request) + if err != nil { + return nil, err + } + + for _, nodeExecution := range response.NodeExecutions { + nodeExecutions[nodeExecution.Metadata.SpecNodeId] = nodeExecution + } + + if len(response.NodeExecutions) < int(request.Limit) { + break + } + + request.Token = response.Token + } + + return nodeExecutions, nil +} + +func (m *MetricsManager) getTaskExecutions(ctx context.Context, request admin.TaskExecutionListRequest) ([]*admin.TaskExecution, error) { + taskExecutions := make([]*admin.TaskExecution, 0) + for { + response, err := m.taskExecutionManager.ListTaskExecutions(ctx, request) + if err != nil { + return nil, err + } + + for _, taskExecution := range response.TaskExecutions { + taskExecutions = append(taskExecutions, taskExecution) + } + + if len(response.TaskExecutions) < int(request.Limit) { + break + } + + request.Token = response.Token + } + + return taskExecutions, nil +} + +func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, + taskExecutions []*admin.TaskExecution, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + + getDataRequest := admin.NodeExecutionGetDataRequest{Id: nodeExecution.Id} + nodeExecutionData, err := m.nodeExecutionManager.GetNodeExecutionData(ctx, getDataRequest) + if err != nil { + return err + } + + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // task execution(s) + parseTaskExecutions(taskExecutions, spans, depth) + + // between task execution(s) and node execution(s) overhead + startNode := nodeExecutions["start-node"] + *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // node execution(s) + if err := m.parseNodeExecutions(ctx, nodeExecutions, nodeExecutionData.DynamicWorkflow.CompiledWorkflow, spans, depth); err != nil { + return err + } + + // backened overhead + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) + if err != nil { + return err + } + + *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + return nil +} + func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Execution, depth int) (*admin.Span, error) { referenceSpan := &admin.ReferenceSpanInfo{ Id: &admin.ReferenceSpanInfo_WorkflowId{ @@ -74,47 +157,31 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex } if depth != 0 { - spans := make([]*admin.Span, 0) // TODO @hamersaw how to make an array + spans := make([]*admin.Span, 0) - // retrieve workflow, execution, and node executions + // retrieve workflow and node executions workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) if err != nil { return nil, err } - nodeExecutions := make(map[string]*admin.NodeExecution) nodeListRequest := admin.NodeExecutionListRequest{ WorkflowExecutionId: execution.Id, Limit: 20, // TODO @hamersaw - parameterize? } - - for { - nodeListResponse, err := m.nodeExecutionManager.ListNodeExecutions(ctx, nodeListRequest) - if err != nil { - return nil, err - } - - for _, nodeExecution := range nodeListResponse.NodeExecutions { - nodeExecutions[nodeExecution.Metadata.SpecNodeId] = nodeExecution - } - - if len(nodeListResponse.NodeExecutions) < int(nodeListRequest.Limit) { - break - } - - nodeListRequest.Token = nodeListResponse.Token + nodeExecutions, err := m.getNodeExecutions(ctx, nodeListRequest) + if err != nil { + return nil, err } - // TODO @hamersaw - sort nodeExecutions by CreatedAt - // compute frontend overhead startNode := nodeExecutions["start-node"] spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) // iterate over nodes and compute overhead - if err := m.parseNodeExecutions(ctx, nodeExecutions, &spans, depth); err != nil { + if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, &spans, depth-1); err != nil { return nil, err } @@ -140,25 +207,38 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex }, nil } -func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { - for _, nodeExecution := range nodeExecutions { - if nodeExecution.Id.NodeId == "start-node" || nodeExecution.Id.NodeId == "end-node" { - continue - } +func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, + nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { - nodeExecutionSpan, err := m.parseNodeExecution(ctx, nodeExecution, depth-1) - if err != nil { - return err - } - // TODO @hamersaw - prepend nodeExecution spans with NODE_TRANSITION time + // retrieve execution + workflowNode := nodeExecution.Closure.GetWorkflowNodeMetadata() - *spans = append(*spans, nodeExecutionSpan) + executionRequest := admin.WorkflowExecutionGetRequest{Id: workflowNode.ExecutionId} + execution, err := m.executionManager.GetExecution(ctx, executionRequest) + if err != nil { + return err + } + + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + execution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // execution + span, err := m.parseExecution(ctx, execution, depth) + if err != nil { + return err } + *spans = append(*spans, span) + + // backend overhead + *spans = append(*spans, createCategoricalSpan(execution.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + return nil } -func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, depth int) (*admin.Span, error) { +func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, node *core.Node, depth int) (*admin.Span, error) { referenceSpan := &admin.ReferenceSpanInfo{ Id: &admin.ReferenceSpanInfo_NodeId{ NodeId: nodeExecution.Id, @@ -166,82 +246,59 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * } if depth != 0 { - spans := make([]*admin.Span, 0) // TODO @hamersaw how to make an array + spans := make([]*admin.Span, 0) - taskExecutions := make([]*admin.TaskExecution, 0) + // TODO @hamersaw - move these into the node parsing functions + // no need to get node executions for a taskNode / etc + // retrieve task and node executions taskListRequest := admin.TaskExecutionListRequest{ NodeExecutionId: nodeExecution.Id, Limit: 20, // TODO @hamersaw - parameterize? } - - // TODO @hamersaw - refactor out task and node execution retrieval - for { - taskListResponse, err := m.taskExecutionManager.ListTaskExecutions(ctx, taskListRequest) - if err != nil { - return nil, err - } - - for _, taskExecution := range taskListResponse.TaskExecutions { - taskExecutions = append(taskExecutions, taskExecution) - } - - if len(taskListResponse.TaskExecutions) < int(taskListRequest.Limit) { - break - } - - taskListRequest.Token = taskListResponse.Token + taskExecutions, err := m.getTaskExecutions(ctx, taskListRequest) + if err != nil { + return nil, err } - // TODO @hamersaw - sort taskExecutions by CreatedAt - /*sort.Slice(a, func(i, j int) bool { - return a[i] < a[j] - })*/ - - nodeExecutions := make(map[string]*admin.NodeExecution) nodeListRequest := admin.NodeExecutionListRequest{ WorkflowExecutionId: nodeExecution.Id.ExecutionId, Limit: 20, // TODO @hamersaw - parameterize? UniqueParentId: nodeExecution.Id.NodeId, } - - // TODO - refactor this out! - for { - nodeListResponse, err := m.nodeExecutionManager.ListNodeExecutions(ctx, nodeListRequest) - if err != nil { - return nil, err - } - - for _, nodeExecution := range nodeListResponse.NodeExecutions { - nodeExecutions[nodeExecution.Metadata.SpecNodeId] = nodeExecution - } - - if len(nodeListResponse.NodeExecutions) < int(nodeListRequest.Limit) { - break - } - - nodeListRequest.Token = nodeListResponse.Token + nodeExecutions, err := m.getNodeExecutions(ctx, nodeListRequest) + if err != nil { + return nil, err } - if !nodeExecution.Metadata.IsParentNode && len(taskExecutions) > 0 { - // handle task node - m.parseTaskNodeExecution(ctx, nodeExecution, taskExecutions, &spans, depth-1) - } else if nodeExecution.Metadata.IsParentNode && len(taskExecutions) > 0 { - // handle dynamic node - if err := m.parseDynamicNodeExecution(ctx, nodeExecution, taskExecutions, nodeExecutions, &spans, depth-1); err != nil { - return nil, err - } - } else if !nodeExecution.Metadata.IsParentNode && nodeExecution.Closure.GetWorkflowNodeMetadata() != nil { - // handle launch plan - if err := m.parseLaunchPlanNodeExecution(ctx, nodeExecution, &spans, depth-1); err != nil { - return nil, err - } - } else if nodeExecution.Metadata.IsParentNode && len(nodeExecutions) > 0 { - // handle subworkflow - if err := m.parseSubworkflowNodeExecution(ctx, nodeExecution, nodeExecutions, &spans, depth-1); err != nil { - return nil, err - } - } else { - // TODO @hamersaw process branch and gate nodes + // parse node + switch target := node.Target.(type) { + case *core.Node_BranchNode: + case *core.Node_GateNode: + case *core.Node_TaskNode: + if nodeExecution.Metadata.IsParentNode { + // handle dynamic node + if err := m.parseDynamicNodeExecution(ctx, nodeExecution, taskExecutions, nodeExecutions, &spans, depth-1); err != nil { + return nil, err + } + } else { + // handle task node + m.parseTaskNodeExecution(ctx, nodeExecution, taskExecutions, &spans, depth-1) + } + case *core.Node_WorkflowNode: + switch workflow := target.WorkflowNode.Reference.(type) { + case *core.WorkflowNode_LaunchplanRef: + // handle launch plan + if err := m.parseLaunchPlanNodeExecution(ctx, nodeExecution, &spans, depth-1); err != nil { + return nil, err + } + case *core.WorkflowNode_SubWorkflowRef: + // handle subworkflow + if err := m.parseSubworkflowNodeExecution(ctx, nodeExecution, workflow.SubWorkflowRef, nodeExecutions, &spans, depth-1); err != nil { + return nil, err + } + } + default: + fmt.Printf("unsupported node type %+v\n", target) } referenceSpan.Spans = spans @@ -256,130 +313,70 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * }, nil } -func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, - taskExecutions []*admin.TaskExecution, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { +func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions map[string]*admin.NodeExecution, + compiledWorkflowClosure *core.CompiledWorkflowClosure, spans *[]*admin.Span, depth int) error { - getDataRequest := admin.NodeExecutionGetDataRequest{Id: nodeExecution.Id} - nodeExecutionData, err := m.nodeExecutionManager.GetNodeExecutionData(ctx, getDataRequest) - if err != nil { - return err - } - - // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - - // task execution(s) - parseTaskExecutions(taskExecutions, spans, depth) + // TODO @hamersaw - sort nodeExecutions by CreatedAt + /*sort.Slice(a, func(i, j int) bool { + return a[i] < a[j] + })*/ - // between task execution(s) and node execution(s) overhead - startNode := nodeExecutions["start-node"] - *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - - // node execution(s) - if err := m.parseNodeExecutions(ctx, nodeExecutions, spans, depth); err != nil { - return err - } - - // backened overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", - nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return err - } - - *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - - return nil -} + for specNodeId, nodeExecution := range nodeExecutions { + if nodeExecution.Id.NodeId == "start-node" || nodeExecution.Id.NodeId == "end-node" { + continue + } -func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, - nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { + // identify subworkflow from node id + var node *core.Node + for _, n := range compiledWorkflowClosure.Primary.Template.Nodes { + if n.Id == specNodeId{ + node = n + } + } - // retrieve execution - workflowNode := nodeExecution.Closure.GetWorkflowNodeMetadata() + if node == nil { + return errors.New("failed to identify workflow node") // TODO @hamersaw - do gooder + } - executionRequest := admin.WorkflowExecutionGetRequest{Id: workflowNode.ExecutionId} - execution, err := m.executionManager.GetExecution(ctx, executionRequest) - if err != nil { - return err - } + // parse node execution + nodeExecutionSpan, err := m.parseNodeExecution(ctx, nodeExecution, node, depth) + if err != nil { + return err + } - // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - execution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // TODO @hamersaw - prepend nodeExecution spans with NODE_TRANSITION time + //latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, nodeExecution.Id.NodeId, + // compiledWorkflowClosure.Primary.Connections.Upstream, nodeExecutions) - // execution - span, err := m.parseExecution(ctx, execution, depth) - if err != nil { - return err + *spans = append(*spans, nodeExecutionSpan) } - *spans = append(*spans, span) - - // backend overhead - *spans = append(*spans, createCategoricalSpan(execution.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - return nil } func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, - nodeExecution *admin.NodeExecution, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { - - // TODO - retrieve subworkflow - executionRequest := admin.WorkflowExecutionGetRequest{Id: nodeExecution.Id.ExecutionId} - execution, err := m.executionManager.GetExecution(ctx, executionRequest) - if err != nil { - return err - } + nodeExecution *admin.NodeExecution, identifier *core.Identifier, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { - workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} + // retrieve workflow + workflowRequest := admin.ObjectGetRequest{Id: identifier} workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) if err != nil { return err } - // identify subworkflow from node id - var node *core.Node - for _, n := range workflow.Closure.CompiledWorkflow.Primary.Template.Nodes { - if n.Id == nodeExecution.Id.NodeId { - node = n - } - } - - if node == nil { - return errors.New("failed to identify subworkflow node") // TODO @hamersaw - do gooder - } - - subworkflowId := node.GetWorkflowNode().GetSubWorkflowRef() - - var subworkflow *core.CompiledWorkflow - for _, subworkflowRef := range workflow.Closure.CompiledWorkflow.SubWorkflows { - if reflect.DeepEqual(subworkflowId, subworkflowRef.Template.Id) { - subworkflow = subworkflowRef - } - } - - if subworkflow == nil { - return errors.New("failed to identify subworkflow") // TODO @hamersaw - do gooder - } - // frontend overhead startNode := nodeExecutions["start-node"] *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) // node execution(s) - if err := m.parseNodeExecutions(ctx, nodeExecutions, spans, depth); err != nil { + if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, spans, depth); err != nil { return err } // backened overhead latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", - subworkflow.Connections.Upstream, nodeExecutions) + workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if err != nil { return err } @@ -390,31 +387,6 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, return nil } -func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, - taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { - - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - - parseTaskExecutions(taskExecutions, spans, depth) - - *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) -} - -func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { - for index, taskExecution := range taskExecutions { - if index > 0 { - *spans = append(*spans, createCategoricalSpan(taskExecutions[index-1].Closure.UpdatedAt, - taskExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - } - - if depth != 0 { - *spans = append(*spans, parseTaskExecution(taskExecution)) - } - } -} - func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { spans := make([]*admin.Span, 0) spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, @@ -441,6 +413,36 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { } } +func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { + // TODO @hamersaw - sort taskExecutions by CreatedAt + /*sort.Slice(a, func(i, j int) bool { + return a[i] < a[j] + })*/ + + for index, taskExecution := range taskExecutions { + if index > 0 { + *spans = append(*spans, createCategoricalSpan(taskExecutions[index-1].Closure.UpdatedAt, + taskExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + + if depth != 0 { + *spans = append(*spans, parseTaskExecution(taskExecution)) + } + } +} + +func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, + taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { + + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + parseTaskExecutions(taskExecutions, spans, depth) + + *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) +} + // TODO @hamersaw - docs func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, request admin.WorkflowExecutionGetMetricsRequest) (*admin.WorkflowExecutionGetMetricsResponse, error) { @@ -464,14 +466,14 @@ func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { - // retrieve node executions + // retrieve node execution nodeRequest := admin.NodeExecutionGetRequest{Id: request.Id} nodeExecution, err := m.nodeExecutionManager.GetNodeExecution(ctx, nodeRequest) if err != nil { return nil, err } - span, err := m.parseNodeExecution(ctx, nodeExecution, int(request.Depth)) + span, err := m.parseNodeExecution(ctx, nodeExecution, nil, int(request.Depth)) // TODO @hamersaw can NOT pass nil for Node - FIX IMMEDIATELY if err != nil { return nil, err } From bc7ce54a9a6e8f4210696fd91e5db226525398f0 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Feb 2023 07:20:33 -0600 Subject: [PATCH 05/27] added node transition times Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index b9fe8048a..c13fb0b37 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -3,7 +3,6 @@ package impl import ( "context" "fmt" - //"reflect" "time" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" @@ -54,6 +53,7 @@ func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nod if connectionSet, exists := upstreamNodeIds[nodeId]; exists { for _, upstreamNodeId := range connectionSet.Ids { upstreamNodeExecution := nodeExecutions[upstreamNodeId] + fmt.Printf(" HAMERSAW - found upstream node '%+v'\n", upstreamNodeId) t := upstreamNodeExecution.Closure.UpdatedAt.AsTime() if t.After(latestUpstreamUpdatedAt) { @@ -322,7 +322,7 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions })*/ for specNodeId, nodeExecution := range nodeExecutions { - if nodeExecution.Id.NodeId == "start-node" || nodeExecution.Id.NodeId == "end-node" { + if specNodeId == "start-node" || specNodeId == "end-node" { continue } @@ -344,9 +344,18 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions return err } - // TODO @hamersaw - prepend nodeExecution spans with NODE_TRANSITION time - //latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, nodeExecution.Id.NodeId, - // compiledWorkflowClosure.Primary.Connections.Upstream, nodeExecutions) + // prepend nodeExecution spans with NODE_TRANSITION time + if referenceSpan, ok := nodeExecutionSpan.Info.(*admin.Span_Reference); ok { + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, specNodeId, + compiledWorkflowClosure.Primary.Connections.Upstream, nodeExecutions) + if err != nil { + return err + } + + // TODO @hamersaw - check if latestUpstreamNode is nil + referenceSpan.Reference.Spans = append([]*admin.Span{createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_TRANSITION)}, referenceSpan.Reference.Spans...) + } *spans = append(*spans, nodeExecutionSpan) } From 87c232bde71701427efb0aceb1aab2779ed82a58 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Feb 2023 09:48:04 -0600 Subject: [PATCH 06/27] sorting node and task executions Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 37 +++++++++++++++++++---------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index c13fb0b37..e1f38d5ec 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -3,6 +3,7 @@ package impl import ( "context" "fmt" + "sort" "time" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" @@ -53,7 +54,6 @@ func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nod if connectionSet, exists := upstreamNodeIds[nodeId]; exists { for _, upstreamNodeId := range connectionSet.Ids { upstreamNodeExecution := nodeExecutions[upstreamNodeId] - fmt.Printf(" HAMERSAW - found upstream node '%+v'\n", upstreamNodeId) t := upstreamNodeExecution.Closure.UpdatedAt.AsTime() if t.After(latestUpstreamUpdatedAt) { @@ -316,16 +316,26 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions map[string]*admin.NodeExecution, compiledWorkflowClosure *core.CompiledWorkflowClosure, spans *[]*admin.Span, depth int) error { - // TODO @hamersaw - sort nodeExecutions by CreatedAt - /*sort.Slice(a, func(i, j int) bool { - return a[i] < a[j] - })*/ - - for specNodeId, nodeExecution := range nodeExecutions { + // sort node executions + sortedNodeExecutions := make([]*admin.NodeExecution, 0, len(nodeExecutions)) + for _, nodeExecution := range nodeExecutions { + sortedNodeExecutions = append(sortedNodeExecutions, nodeExecution) + } + sort.Slice(sortedNodeExecutions, func(i, j int) bool { + x := sortedNodeExecutions[i].Closure.CreatedAt.AsTime() + y := sortedNodeExecutions[j].Closure.CreatedAt.AsTime() + return x.Before(y) + }) + + // iterate over sorted node executions + for _, nodeExecution := range sortedNodeExecutions { + specNodeId := nodeExecution.Metadata.SpecNodeId if specNodeId == "start-node" || specNodeId == "end-node" { continue } + fmt.Printf("HAMERSAW - parsing node %s\n", specNodeId) + // identify subworkflow from node id var node *core.Node for _, n := range compiledWorkflowClosure.Primary.Template.Nodes { @@ -423,11 +433,14 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { } func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { - // TODO @hamersaw - sort taskExecutions by CreatedAt - /*sort.Slice(a, func(i, j int) bool { - return a[i] < a[j] - })*/ - + // sort task executions + sort.Slice(taskExecutions, func(i, j int) bool { + x := taskExecutions[i].Closure.CreatedAt.AsTime() + y := taskExecutions[j].Closure.CreatedAt.AsTime() + return x.Before(y) + }) + + // iterate over task executions for index, taskExecution := range taskExecutions { if index > 0 { *spans = append(*spans, createCategoricalSpan(taskExecutions[index-1].Closure.UpdatedAt, From dbe0dfe7b906ab51a3396483d02cb34e687a8339 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Feb 2023 09:51:34 -0600 Subject: [PATCH 07/27] added metrics metrics ... inception Signed-off-by: Daniel Rammer --- pkg/rpc/adminservice/execution.go | 8 ++++---- pkg/rpc/adminservice/metrics.go | 4 ++++ pkg/rpc/adminservice/node_execution.go | 6 +++--- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pkg/rpc/adminservice/execution.go b/pkg/rpc/adminservice/execution.go index 403223de6..87e2fe487 100644 --- a/pkg/rpc/adminservice/execution.go +++ b/pkg/rpc/adminservice/execution.go @@ -131,7 +131,7 @@ func (m *AdminService) GetExecutionData( } var response *admin.WorkflowExecutionGetDataResponse var err error - m.Metrics.executionEndpointMetrics.get.Time(func() { + m.Metrics.executionEndpointMetrics.getData.Time(func() { response, err = m.ExecutionManager.GetExecutionData(ctx, *request) }) if err != nil { @@ -149,13 +149,13 @@ func (m *AdminService) GetExecutionMetrics( } var response *admin.WorkflowExecutionGetMetricsResponse var err error - m.Metrics.executionEndpointMetrics.get.Time(func() { + m.Metrics.executionEndpointMetrics.getMetrics.Time(func() { response, err = m.MetricsManager.GetExecutionMetrics(ctx, *request) }) if err != nil { - return nil, util.TransformAndRecordError(err, &m.Metrics.executionEndpointMetrics.getData) + return nil, util.TransformAndRecordError(err, &m.Metrics.executionEndpointMetrics.getMetrics) } - m.Metrics.executionEndpointMetrics.getData.Success() + m.Metrics.executionEndpointMetrics.getMetrics.Success() return response, nil } diff --git a/pkg/rpc/adminservice/metrics.go b/pkg/rpc/adminservice/metrics.go index f5c02c21e..741dbc7cc 100644 --- a/pkg/rpc/adminservice/metrics.go +++ b/pkg/rpc/adminservice/metrics.go @@ -17,6 +17,7 @@ type executionEndpointMetrics struct { get util.RequestMetrics update util.RequestMetrics getData util.RequestMetrics + getMetrics util.RequestMetrics list util.RequestMetrics terminate util.RequestMetrics } @@ -47,6 +48,7 @@ type nodeExecutionEndpointMetrics struct { createEvent util.RequestMetrics get util.RequestMetrics getData util.RequestMetrics + getMetrics util.RequestMetrics list util.RequestMetrics listChildren util.RequestMetrics } @@ -137,6 +139,7 @@ func InitMetrics(adminScope promutils.Scope) AdminMetrics { get: util.NewRequestMetrics(adminScope, "get_execution"), update: util.NewRequestMetrics(adminScope, "update_execution"), getData: util.NewRequestMetrics(adminScope, "get_execution_data"), + getMetrics: util.NewRequestMetrics(adminScope, "get_execution_metrics"), list: util.NewRequestMetrics(adminScope, "list_execution"), terminate: util.NewRequestMetrics(adminScope, "terminate_execution"), }, @@ -161,6 +164,7 @@ func InitMetrics(adminScope promutils.Scope) AdminMetrics { createEvent: util.NewRequestMetrics(adminScope, "create_node_execution_event"), get: util.NewRequestMetrics(adminScope, "get_node_execution"), getData: util.NewRequestMetrics(adminScope, "get_node_execution_data"), + getMetrics: util.NewRequestMetrics(adminScope, "get_execution_metrics"), list: util.NewRequestMetrics(adminScope, "list_node_execution"), listChildren: util.NewRequestMetrics(adminScope, "list_children_node_executions"), }, diff --git a/pkg/rpc/adminservice/node_execution.go b/pkg/rpc/adminservice/node_execution.go index df50e00cf..f4a008d83 100644 --- a/pkg/rpc/adminservice/node_execution.go +++ b/pkg/rpc/adminservice/node_execution.go @@ -117,12 +117,12 @@ func (m *AdminService) GetNodeExecutionMetrics( } var response *admin.NodeExecutionGetMetricsResponse var err error - m.Metrics.nodeExecutionEndpointMetrics.getData.Time(func() { + m.Metrics.nodeExecutionEndpointMetrics.getMetrics.Time(func() { response, err = m.MetricsManager.GetNodeExecutionMetrics(ctx, *request) }) if err != nil { - return nil, util.TransformAndRecordError(err, &m.Metrics.nodeExecutionEndpointMetrics.getData) + return nil, util.TransformAndRecordError(err, &m.Metrics.nodeExecutionEndpointMetrics.getMetrics) } - m.Metrics.nodeExecutionEndpointMetrics.getData.Success() + m.Metrics.nodeExecutionEndpointMetrics.getMetrics.Success() return response, nil } From 241f6d80481e788c70a25aa32b131d918cad5371 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Feb 2023 10:26:04 -0600 Subject: [PATCH 08/27] fixed duplicate metrics Signed-off-by: Daniel Rammer --- pkg/rpc/adminservice/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/rpc/adminservice/metrics.go b/pkg/rpc/adminservice/metrics.go index 741dbc7cc..b2bab4514 100644 --- a/pkg/rpc/adminservice/metrics.go +++ b/pkg/rpc/adminservice/metrics.go @@ -164,7 +164,7 @@ func InitMetrics(adminScope promutils.Scope) AdminMetrics { createEvent: util.NewRequestMetrics(adminScope, "create_node_execution_event"), get: util.NewRequestMetrics(adminScope, "get_node_execution"), getData: util.NewRequestMetrics(adminScope, "get_node_execution_data"), - getMetrics: util.NewRequestMetrics(adminScope, "get_execution_metrics"), + getMetrics: util.NewRequestMetrics(adminScope, "get_node_execution_metrics"), list: util.NewRequestMetrics(adminScope, "list_node_execution"), listChildren: util.NewRequestMetrics(adminScope, "list_children_node_executions"), }, From 40f5387d5bf9ec84bebad985aa23ddc22f4d35e4 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Feb 2023 11:37:26 -0600 Subject: [PATCH 09/27] branch node working? Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 60 +++++++++++++++++++++++++++-- 1 file changed, 56 insertions(+), 4 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index e1f38d5ec..d23040167 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -110,6 +110,52 @@ func (m *MetricsManager) getTaskExecutions(ctx context.Context, request admin.Ta return taskExecutions, nil } +func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, + branchNode *core.BranchNode, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + + // TODO @hamersaw - len(nodeExecutions) will always be 1 -> do we need to call parseNodeExecutions .. or can we just call parseNodeExecution? + var branchNodeExecution *admin.NodeExecution + for _, n := range nodeExecutions { + branchNodeExecution = n + } + + // lookup node - TODO @hamersaw probably refactor this out and add a check for nil + var node *core.Node + if branchNode.IfElse.Case.ThenNode.Id == branchNodeExecution.Metadata.SpecNodeId { + node = branchNode.IfElse.Case.ThenNode + } + + for _, other := range branchNode.IfElse.Other { + if other.ThenNode.Id == branchNodeExecution.Metadata.SpecNodeId { + node = other.ThenNode + } + } + + if elseNode, ok := branchNode.IfElse.Default.(*core.IfElseBlock_ElseNode); ok { + if elseNode.ElseNode.Id == branchNodeExecution.Metadata.SpecNodeId { + node = elseNode.ElseNode + } + } + + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + branchNodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // node execution + nodeExecutionSpan, err := m.parseNodeExecution(ctx, branchNodeExecution, node, depth) + if err != nil { + return err + } + + *spans = append(*spans, nodeExecutionSpan) + + // backened overhead + *spans = append(*spans, createCategoricalSpan(branchNodeExecution.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + return nil +} + func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, taskExecutions []*admin.TaskExecution, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { @@ -273,6 +319,10 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * // parse node switch target := node.Target.(type) { case *core.Node_BranchNode: + // handle branch node + if err := m.parseBranchNodeExecution(ctx, nodeExecution, target.BranchNode, nodeExecutions, &spans, depth-1); err != nil { + return nil, err + } case *core.Node_GateNode: case *core.Node_TaskNode: if nodeExecution.Metadata.IsParentNode { @@ -334,12 +384,10 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions continue } - fmt.Printf("HAMERSAW - parsing node %s\n", specNodeId) - - // identify subworkflow from node id + // get node defintion from workflow var node *core.Node for _, n := range compiledWorkflowClosure.Primary.Template.Nodes { - if n.Id == specNodeId{ + if n.Id == specNodeId { node = n } } @@ -408,13 +456,17 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { spans := make([]*admin.Span, 0) + + // frontend overhead spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + // plugin execution taskEndTime := timestamppb.New(taskExecution.Closure.StartedAt.AsTime().Add(taskExecution.Closure.Duration.AsDuration())) spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, taskEndTime, admin.CategoricalSpanInfo_PLUGIN_EXECUTION)) + // backend overhead spans = append(spans, createCategoricalSpan(taskEndTime, taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) From 779faa132e0084b25457e1b51549294ba8b6a269 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Feb 2023 14:04:32 -0600 Subject: [PATCH 10/27] implemented gate node Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index d23040167..a55627734 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -253,6 +253,21 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex }, nil } +func (m *MetricsManager) parseGateNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) { + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.StartedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // idle time + nodeEndTime := timestamppb.New(nodeExecution.Closure.StartedAt.AsTime().Add(nodeExecution.Closure.Duration.AsDuration())) + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, + nodeEndTime, admin.CategoricalSpanInfo_EXECUTION_IDLE)) + + // backend overhead + *spans = append(*spans, createCategoricalSpan(nodeEndTime, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) +} + func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { @@ -324,6 +339,8 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * return nil, err } case *core.Node_GateNode: + // handle gate node + m.parseGateNodeExecution(ctx, nodeExecution, &spans, depth-1) case *core.Node_TaskNode: if nodeExecution.Metadata.IsParentNode { // handle dynamic node From 31d0424d5ce7ea79184db8c8a2f7d9c3de47c710 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 21 Feb 2023 17:38:12 -0600 Subject: [PATCH 11/27] working with partial completions and failures and cache hits Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 516 ++++++++++++++++++---------- 1 file changed, 325 insertions(+), 191 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index a55627734..3dad1277f 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -3,6 +3,7 @@ package impl import ( "context" "fmt" + "reflect" "sort" "time" @@ -15,11 +16,25 @@ import ( "github.com/flyteorg/flytestdlib/promutils" + "github.com/golang/protobuf/ptypes/duration" "github.com/golang/protobuf/ptypes/timestamp" "google.golang.org/protobuf/types/known/timestamppb" ) +const REQUEST_LIMIT uint32 = 50 + +var ( + nilDuration *duration.Duration = &duration.Duration{ + Seconds: 0, + Nanos: 0, + } + nilTimestamp *timestamp.Timestamp = ×tamp.Timestamp{ + Seconds: 0, + Nanos: 0, + } +) + type metrics struct { Scope promutils.Scope //Set labeled.Counter @@ -46,6 +61,27 @@ func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category adm } } +func getBranchNode(nodeId string, branchNode *core.BranchNode) *core.Node { + if branchNode.IfElse.Case.ThenNode.Id == nodeId { + return branchNode.IfElse.Case.ThenNode + } + + for _, other := range branchNode.IfElse.Other { + if other.ThenNode.Id == nodeId { + return other.ThenNode + } + } + + if elseNode, ok := branchNode.IfElse.Default.(*core.IfElseBlock_ElseNode); ok { + if elseNode.ElseNode.Id == nodeId { + return elseNode.ElseNode + } + } + + return nil +} + + func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nodeId string, upstreamNodeIds map[string]*core.ConnectionSet_IdList, nodeExecutions map[string]*admin.NodeExecution) (*admin.NodeExecution, error) { @@ -53,7 +89,10 @@ func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nod var latestUpstreamUpdatedAt = time.Unix(0, 0) if connectionSet, exists := upstreamNodeIds[nodeId]; exists { for _, upstreamNodeId := range connectionSet.Ids { - upstreamNodeExecution := nodeExecutions[upstreamNodeId] + upstreamNodeExecution, exists := nodeExecutions[upstreamNodeId] + if !exists { + continue + } t := upstreamNodeExecution.Closure.UpdatedAt.AsTime() if t.After(latestUpstreamUpdatedAt) { @@ -110,87 +149,127 @@ func (m *MetricsManager) getTaskExecutions(ctx context.Context, request admin.Ta return taskExecutions, nil } -func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, - branchNode *core.BranchNode, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { +func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, + nodeExecution *admin.NodeExecution, branchNode *core.BranchNode, spans *[]*admin.Span, depth int) error { - // TODO @hamersaw - len(nodeExecutions) will always be 1 -> do we need to call parseNodeExecutions .. or can we just call parseNodeExecution? - var branchNodeExecution *admin.NodeExecution - for _, n := range nodeExecutions { - branchNodeExecution = n + // retrieve node execution(s) + nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ + WorkflowExecutionId: nodeExecution.Id.ExecutionId, + Limit: REQUEST_LIMIT, + UniqueParentId: nodeExecution.Id.NodeId, + }) + if err != nil { + return err } - // lookup node - TODO @hamersaw probably refactor this out and add a check for nil - var node *core.Node - if branchNode.IfElse.Case.ThenNode.Id == branchNodeExecution.Metadata.SpecNodeId { - node = branchNode.IfElse.Case.ThenNode - } + // check if the node started + if len(nodeExecutions) == 0 { + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } else { + // parse branchNode + if len(nodeExecutions) != 1 { + // TODO @hamersaw throw error - branch nodes execute a single node + } - for _, other := range branchNode.IfElse.Other { - if other.ThenNode.Id == branchNodeExecution.Metadata.SpecNodeId { - node = other.ThenNode + var branchNodeExecution *admin.NodeExecution + for _, e := range nodeExecutions { + branchNodeExecution = e } - } - if elseNode, ok := branchNode.IfElse.Default.(*core.IfElseBlock_ElseNode); ok { - if elseNode.ElseNode.Id == branchNodeExecution.Metadata.SpecNodeId { - node = elseNode.ElseNode + node := getBranchNode(branchNodeExecution.Metadata.SpecNodeId, branchNode) + if node != nil { + // TODO @hamersaw throw error - failed to parse node } - } - // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - branchNodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + branchNodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - // node execution - nodeExecutionSpan, err := m.parseNodeExecution(ctx, branchNodeExecution, node, depth) - if err != nil { - return err - } + // node execution + nodeExecutionSpan, err := m.parseNodeExecution(ctx, branchNodeExecution, node, depth) + if err != nil { + return err + } - *spans = append(*spans, nodeExecutionSpan) + *spans = append(*spans, nodeExecutionSpan) - // backened overhead - *spans = append(*spans, createCategoricalSpan(branchNodeExecution.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // backened overhead + if !nodeExecution.Closure.UpdatedAt.AsTime().Before(branchNodeExecution.Closure.UpdatedAt.AsTime()) { + *spans = append(*spans, createCategoricalSpan(branchNodeExecution.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } return nil } -func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, - taskExecutions []*admin.TaskExecution, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { - - getDataRequest := admin.NodeExecutionGetDataRequest{Id: nodeExecution.Id} - nodeExecutionData, err := m.nodeExecutionManager.GetNodeExecutionData(ctx, getDataRequest) +func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { + taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ + NodeExecutionId: nodeExecution.Id, + Limit: REQUEST_LIMIT, + }) if err != nil { return err } - // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // if no task executions then everything is execution overhead + if len(taskExecutions) == 0 { + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } else { + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - // task execution(s) - parseTaskExecutions(taskExecutions, spans, depth) + // task execution(s) + parseTaskExecutions(taskExecutions, spans, depth) - // between task execution(s) and node execution(s) overhead - startNode := nodeExecutions["start-node"] - *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ + WorkflowExecutionId: nodeExecution.Id.ExecutionId, + Limit: REQUEST_LIMIT, + UniqueParentId: nodeExecution.Id.NodeId, + }) + if err != nil { + return err + } - // node execution(s) - if err := m.parseNodeExecutions(ctx, nodeExecutions, nodeExecutionData.DynamicWorkflow.CompiledWorkflow, spans, depth); err != nil { - return err - } + lastTask := taskExecutions[len(taskExecutions)-1] + if len(nodeExecutions) == 0 { + if !nodeExecution.Closure.UpdatedAt.AsTime().Before(lastTask.Closure.UpdatedAt.AsTime()) { + *spans = append(*spans, createCategoricalSpan(lastTask.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } else { + // between task execution(s) and node execution(s) overhead + startNode := nodeExecutions["start-node"] + *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // node execution(s) + getDataRequest := admin.NodeExecutionGetDataRequest{Id: nodeExecution.Id} + nodeExecutionData, err := m.nodeExecutionManager.GetNodeExecutionData(ctx, getDataRequest) + if err != nil { + return err + } - // backened overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", - nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return err - } + if err := m.parseNodeExecutions(ctx, nodeExecutions, nodeExecutionData.DynamicWorkflow.CompiledWorkflow, spans, depth); err != nil { + return err + } - *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // backened overhead + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) + if err != nil { + return err + } + + if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { + *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } + } return nil } @@ -212,34 +291,41 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex return nil, err } - nodeListRequest := admin.NodeExecutionListRequest{ + nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ WorkflowExecutionId: execution.Id, - Limit: 20, // TODO @hamersaw - parameterize? - } - nodeExecutions, err := m.getNodeExecutions(ctx, nodeListRequest) + Limit: REQUEST_LIMIT, + }) if err != nil { return nil, err } - // compute frontend overhead + // check if workflow has started startNode := nodeExecutions["start-node"] - spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - - // iterate over nodes and compute overhead - if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, &spans, depth-1); err != nil { - return nil, err - } + if startNode.Closure.UpdatedAt == nil || reflect.DeepEqual(startNode.Closure.UpdatedAt, nilTimestamp) { + spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, + execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } else { + // compute frontend overhead + spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // iterate over nodes and compute overhead + if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, &spans, depth-1); err != nil { + return nil, err + } - // compute backend overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", - workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return nil, err - } + // compute backend overhead + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) + if err != nil { + return nil, err + } - spans = append(spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + if latestUpstreamNode != nil && !execution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { + spans = append(spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } referenceSpan.Spans = spans } @@ -254,47 +340,64 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex } func (m *MetricsManager) parseGateNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) { - // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.StartedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - - // idle time - nodeEndTime := timestamppb.New(nodeExecution.Closure.StartedAt.AsTime().Add(nodeExecution.Closure.Duration.AsDuration())) - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, - nodeEndTime, admin.CategoricalSpanInfo_EXECUTION_IDLE)) - - // backend overhead - *spans = append(*spans, createCategoricalSpan(nodeEndTime, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // check if node has started yet + if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, nilTimestamp) { + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + } else { + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.StartedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // check if plugin has completed yet + if nodeExecution.Closure.Duration == nil || reflect.DeepEqual(nodeExecution.Closure.Duration, nilDuration) { + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_IDLE)) + } else { + // idle time + nodeEndTime := timestamppb.New(nodeExecution.Closure.StartedAt.AsTime().Add(nodeExecution.Closure.Duration.AsDuration())) + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, + nodeEndTime, admin.CategoricalSpanInfo_EXECUTION_IDLE)) + + // backend overhead + *spans = append(*spans, createCategoricalSpan(nodeEndTime, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } } -func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, - nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { - - // retrieve execution +func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { + // check if workflow started yet workflowNode := nodeExecution.Closure.GetWorkflowNodeMetadata() + if workflowNode == nil { + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } else { + // retrieve execution + executionRequest := admin.WorkflowExecutionGetRequest{Id: workflowNode.ExecutionId} + execution, err := m.executionManager.GetExecution(ctx, executionRequest) + if err != nil { + return err + } - executionRequest := admin.WorkflowExecutionGetRequest{Id: workflowNode.ExecutionId} - execution, err := m.executionManager.GetExecution(ctx, executionRequest) - if err != nil { - return err - } - - // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - execution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + execution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - // execution - span, err := m.parseExecution(ctx, execution, depth) - if err != nil { - return err - } + // execution + span, err := m.parseExecution(ctx, execution, depth) + if err != nil { + return err + } - *spans = append(*spans, span) + *spans = append(*spans, span) - // backend overhead - *spans = append(*spans, createCategoricalSpan(execution.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // backend overhead + if !nodeExecution.Closure.UpdatedAt.AsTime().Before(execution.Closure.UpdatedAt.AsTime()) { + *spans = append(*spans, createCategoricalSpan(execution.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } return nil } @@ -309,63 +412,40 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * if depth != 0 { spans := make([]*admin.Span, 0) - // TODO @hamersaw - move these into the node parsing functions - // no need to get node executions for a taskNode / etc - // retrieve task and node executions - taskListRequest := admin.TaskExecutionListRequest{ - NodeExecutionId: nodeExecution.Id, - Limit: 20, // TODO @hamersaw - parameterize? - } - taskExecutions, err := m.getTaskExecutions(ctx, taskListRequest) - if err != nil { - return nil, err - } - - nodeListRequest := admin.NodeExecutionListRequest{ - WorkflowExecutionId: nodeExecution.Id.ExecutionId, - Limit: 20, // TODO @hamersaw - parameterize? - UniqueParentId: nodeExecution.Id.NodeId, - } - nodeExecutions, err := m.getNodeExecutions(ctx, nodeListRequest) - if err != nil { - return nil, err - } - // parse node + var err error switch target := node.Target.(type) { case *core.Node_BranchNode: // handle branch node - if err := m.parseBranchNodeExecution(ctx, nodeExecution, target.BranchNode, nodeExecutions, &spans, depth-1); err != nil { - return nil, err - } + err = m.parseBranchNodeExecution(ctx, nodeExecution, target.BranchNode, &spans, depth-1) case *core.Node_GateNode: // handle gate node m.parseGateNodeExecution(ctx, nodeExecution, &spans, depth-1) case *core.Node_TaskNode: if nodeExecution.Metadata.IsParentNode { // handle dynamic node - if err := m.parseDynamicNodeExecution(ctx, nodeExecution, taskExecutions, nodeExecutions, &spans, depth-1); err != nil { - return nil, err - } + err = m.parseDynamicNodeExecution(ctx, nodeExecution, &spans, depth-1) } else { // handle task node - m.parseTaskNodeExecution(ctx, nodeExecution, taskExecutions, &spans, depth-1) + err = m.parseTaskNodeExecution(ctx, nodeExecution, &spans, depth-1) } case *core.Node_WorkflowNode: switch workflow := target.WorkflowNode.Reference.(type) { case *core.WorkflowNode_LaunchplanRef: // handle launch plan - if err := m.parseLaunchPlanNodeExecution(ctx, nodeExecution, &spans, depth-1); err != nil { - return nil, err - } + err = m.parseLaunchPlanNodeExecution(ctx, nodeExecution, &spans, depth-1) case *core.WorkflowNode_SubWorkflowRef: // handle subworkflow - if err := m.parseSubworkflowNodeExecution(ctx, nodeExecution, workflow.SubWorkflowRef, nodeExecutions, &spans, depth-1); err != nil { - return nil, err - } + err = m.parseSubworkflowNodeExecution(ctx, nodeExecution, workflow.SubWorkflowRef, &spans, depth-1) + default: + err = fmt.Errorf("unsupported node type %+v\n", target) } default: - fmt.Printf("unsupported node type %+v\n", target) + err = fmt.Errorf("unsupported node type %+v\n", target) + } + + if err != nil { + return nil, err } referenceSpan.Spans = spans @@ -427,9 +507,10 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions return err } - // TODO @hamersaw - check if latestUpstreamNode is nil - referenceSpan.Reference.Spans = append([]*admin.Span{createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_TRANSITION)}, referenceSpan.Reference.Spans...) + if latestUpstreamNode != nil { + referenceSpan.Reference.Spans = append([]*admin.Span{createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_TRANSITION)}, referenceSpan.Reference.Spans...) + } } *spans = append(*spans, nodeExecutionSpan) @@ -439,34 +520,52 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions } func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, - nodeExecution *admin.NodeExecution, identifier *core.Identifier, nodeExecutions map[string]*admin.NodeExecution, spans *[]*admin.Span, depth int) error { + nodeExecution *admin.NodeExecution, identifier *core.Identifier, spans *[]*admin.Span, depth int) error { - // retrieve workflow - workflowRequest := admin.ObjectGetRequest{Id: identifier} - workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) + // retrieve node execution(s) + nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ + WorkflowExecutionId: nodeExecution.Id.ExecutionId, + Limit: REQUEST_LIMIT, + UniqueParentId: nodeExecution.Id.NodeId, + }) if err != nil { return err } - // frontend overhead - startNode := nodeExecutions["start-node"] - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // check if the subworkflow started + if len(nodeExecutions) == 0 { + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } else { + // frontend overhead + startNode := nodeExecutions["start-node"] + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - // node execution(s) - if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, spans, depth); err != nil { - return err - } + // retrieve workflow + workflowRequest := admin.ObjectGetRequest{Id: identifier} + workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) + if err != nil { + return err + } - // backened overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", - workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return err - } + // node execution(s) + if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, spans, depth); err != nil { + return err + } - *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + // backened overhead + latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) + if err != nil { + return err + } + + if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { + *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } return nil } @@ -474,18 +573,32 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { spans := make([]*admin.Span, 0) - // frontend overhead - spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, - taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) - - // plugin execution - taskEndTime := timestamppb.New(taskExecution.Closure.StartedAt.AsTime().Add(taskExecution.Closure.Duration.AsDuration())) - spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, - taskEndTime, admin.CategoricalSpanInfo_PLUGIN_EXECUTION)) - - // backend overhead - spans = append(spans, createCategoricalSpan(taskEndTime, - taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + // check if plugin has started yet + if taskExecution.Closure.StartedAt == nil || reflect.DeepEqual(taskExecution.Closure.StartedAt, nilTimestamp) { + spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, + taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + } else { + // frontend overhead + spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, + taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + + // check if plugin has completed yet + if taskExecution.Closure.Duration == nil || reflect.DeepEqual(taskExecution.Closure.Duration, nilDuration) { + spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, + taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_RUNTIME)) + } else { + // plugin execution + taskEndTime := timestamppb.New(taskExecution.Closure.StartedAt.AsTime().Add(taskExecution.Closure.Duration.AsDuration())) + spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, + taskEndTime, admin.CategoricalSpanInfo_PLUGIN_RUNTIME)) + + // backend overhead + if !taskEndTime.AsTime().Before(taskExecution.Closure.UpdatedAt.AsTime()) { + spans = append(spans, createCategoricalSpan(taskEndTime, + taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + } + } + } return &admin.Span{ StartTime: taskExecution.Closure.CreatedAt, @@ -522,16 +635,37 @@ func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin. } } -func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, - taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { - - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) +func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { + // retrieve task executions + taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ + NodeExecutionId: nodeExecution.Id, + Limit: REQUEST_LIMIT, + }) + if err != nil { + return err + } - parseTaskExecutions(taskExecutions, spans, depth) + // if no task executions then everything is execution overhead + if len(taskExecutions) == 0 { + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } else { + // frontend overhead + *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + + // parse task executions + parseTaskExecutions(taskExecutions, spans, depth) + + // backend overhead + lastTask := taskExecutions[len(taskExecutions)-1] + if !nodeExecution.Closure.UpdatedAt.AsTime().Before(lastTask.Closure.UpdatedAt.AsTime()) { + *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + } + } - *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + return nil } // TODO @hamersaw - docs From beac75c6d5ebc7c191a18f840d86a168610c1982 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Thu, 23 Feb 2023 02:51:08 -0600 Subject: [PATCH 12/27] added unit tests Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 8 +- pkg/manager/impl/metrics_manager_test.go | 1097 ++++++++++++++++++++++ pkg/manager/mocks/workflow.go | 9 + 3 files changed, 1108 insertions(+), 6 deletions(-) create mode 100644 pkg/manager/impl/metrics_manager_test.go diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 3dad1277f..2691759a9 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -8,7 +8,6 @@ import ( "time" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" - repoInterfaces "github.com/flyteorg/flyteadmin/pkg/repositories/interfaces" "github.com/pkg/errors" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" @@ -41,7 +40,6 @@ type metrics struct { } type MetricsManager struct { - db repoInterfaces.Repository workflowManager interfaces.WorkflowInterface executionManager interfaces.ExecutionInterface nodeExecutionManager interfaces.NodeExecutionInterface @@ -343,7 +341,7 @@ func (m *MetricsManager) parseGateNodeExecution(ctx context.Context, nodeExecuti // check if node has started yet if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, nilTimestamp) { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) } else { // frontend overhead *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, @@ -593,7 +591,7 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { taskEndTime, admin.CategoricalSpanInfo_PLUGIN_RUNTIME)) // backend overhead - if !taskEndTime.AsTime().Before(taskExecution.Closure.UpdatedAt.AsTime()) { + if !taskExecution.Closure.UpdatedAt.AsTime().Before(taskEndTime.AsTime()) { spans = append(spans, createCategoricalSpan(taskEndTime, taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) } @@ -707,7 +705,6 @@ func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, } func NewMetricsManager( - db repoInterfaces.Repository, workflowManager interfaces.WorkflowInterface, executionManager interfaces.ExecutionInterface, nodeExecutionManager interfaces.NodeExecutionInterface, @@ -719,7 +716,6 @@ func NewMetricsManager( } return &MetricsManager{ - db: db, workflowManager: workflowManager, executionManager: executionManager, nodeExecutionManager: nodeExecutionManager, diff --git a/pkg/manager/impl/metrics_manager_test.go b/pkg/manager/impl/metrics_manager_test.go new file mode 100644 index 000000000..1dfcea4ec --- /dev/null +++ b/pkg/manager/impl/metrics_manager_test.go @@ -0,0 +1,1097 @@ +package impl + +import ( + "context" + "reflect" + "testing" + + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" + "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + + "github.com/flyteorg/flyteadmin/pkg/manager/mocks" + "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" + + "github.com/golang/protobuf/ptypes/duration" + "github.com/golang/protobuf/ptypes/timestamp" + + "github.com/stretchr/testify/assert" +) + +var ( + emptyTimestamp = ×tamp.Timestamp{ + Seconds: 0, + Nanos: 0, + } + baseTimestamp = ×tamp.Timestamp{ + Seconds: 643852800, + Nanos: 0, + } + + emptyDuration = &duration.Duration{ + Seconds: 0, + Nanos: 0, + } + baseDuration = &duration.Duration{ + Seconds: 400, + Nanos: 0, + } +) + +func addTimestamp(ts *timestamp.Timestamp, seconds int64) *timestamp.Timestamp { + return ×tamp.Timestamp{ + Seconds: ts.Seconds + seconds, + Nanos: ts.Nanos, + } +} + +func getMockExecutionManager(execution *admin.Execution) interfaces.ExecutionInterface { + mockExecutionManager := mocks.MockExecutionManager{} + mockExecutionManager.SetGetCallback( + func(ctx context.Context, request admin.WorkflowExecutionGetRequest) (*admin.Execution, error) { + return execution, nil + }) + + return &mockExecutionManager +} + +func getMockNodeExecutionManager(nodeExecutions []*admin.NodeExecution, + dynamicWorkflow *admin.DynamicWorkflowNodeMetadata) interfaces.NodeExecutionInterface { + + mockNodeExecutionManager := mocks.MockNodeExecutionManager{} + mockNodeExecutionManager.SetListNodeExecutionsFunc( + func(ctx context.Context, request admin.NodeExecutionListRequest) (*admin.NodeExecutionList, error) { + return &admin.NodeExecutionList{ + NodeExecutions: nodeExecutions, + }, nil + }) + mockNodeExecutionManager.SetGetNodeExecutionDataFunc( + func(ctx context.Context, request admin.NodeExecutionGetDataRequest) (*admin.NodeExecutionGetDataResponse, error) { + return &admin.NodeExecutionGetDataResponse{ + DynamicWorkflow: dynamicWorkflow, + }, nil + }) + + return &mockNodeExecutionManager +} + +func getMockTaskExecutionManager(taskExecutions []*admin.TaskExecution) interfaces.TaskExecutionInterface { + mockTaskExecutionManager := mocks.MockTaskExecutionManager{} + mockTaskExecutionManager.SetListTaskExecutionsCallback( + func(ctx context.Context, request admin.TaskExecutionListRequest) (*admin.TaskExecutionList, error) { + return &admin.TaskExecutionList{ + TaskExecutions: taskExecutions, + }, nil + }) + + return &mockTaskExecutionManager +} + +func getMockWorkflowManager(workflow *admin.Workflow) interfaces.WorkflowInterface { + mockWorkflowManager := mocks.MockWorkflowManager{} + mockWorkflowManager.SetGetCallback( + func(ctx context.Context, request admin.ObjectGetRequest) (*admin.Workflow, error) { + return workflow, nil + }) + + return &mockWorkflowManager +} + +func parseSpansInfo(spans []*admin.Span) (map[admin.CategoricalSpanInfo_Category][]int64, int) { + categoryDurations := make(map[admin.CategoricalSpanInfo_Category][]int64) + referenceCount := 0 + for _, span := range spans { + switch info := span.Info.(type) { + case *admin.Span_Category: + category := info.Category.Category + duration := span.EndTime.Seconds - span.StartTime.Seconds + if array, exists := categoryDurations[category]; exists { + categoryDurations[category] = append(array, duration) + } else { + categoryDurations[category] = []int64{duration} + } + case *admin.Span_Reference: + referenceCount++ + } + } + + return categoryDurations, referenceCount +} + +func TestGetLatestUpstreamNodeExecution(t *testing.T) { + // TODO @hamersaw +} + +func TestParseBranchNodeExecution(t *testing.T) { + tests := []struct{ + name string + nodeExecution *admin.NodeExecution + nodeExecutions []*admin.NodeExecution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + referenceCount int + }{ + { + "NotStarted", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 5), + }, + }, + nil, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + }, + 0, + }, + { + "Running", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: baseTimestamp, + }, + }, + []*admin.NodeExecution{ + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "foo", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 430), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + }, + 1, + }, + { + "Completed", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 450), + }, + }, + []*admin.NodeExecution{ + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "foo", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 430), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 20}, + }, + 1, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // initialize mocks + mockNodeExecutionManager := getMockNodeExecutionManager(test.nodeExecutions, nil) + mockTaskExecutionManager := getMockTaskExecutionManager([]*admin.TaskExecution{}) + metricsManager := MetricsManager{ + nodeExecutionManager: mockNodeExecutionManager, + taskExecutionManager: mockTaskExecutionManager, + } + + // parse node execution + branchNode := &core.BranchNode{ + IfElse: &core.IfElseBlock{ + Case: &core.IfBlock{ + ThenNode: &core.Node{ + Id: "foo", + Target: &core.Node_TaskNode{}, + }, + }, + }, + } + + spans := make([]*admin.Span, 0) + err := metricsManager.parseBranchNodeExecution(context.TODO(), test.nodeExecution, branchNode, &spans, -1) + assert.Nil(t, err) + + // validate spans + categoryDurations, referenceCount := parseSpansInfo(spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + assert.Equal(t, test.referenceCount, referenceCount) + }) + } +} + +func TestParseDynamicNodeExecution(t *testing.T) { + tests := []struct{ + name string + nodeExecution *admin.NodeExecution + taskExecutions []*admin.TaskExecution + nodeExecutions []*admin.NodeExecution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + referenceCount int + }{ + { + "NotStarted", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 5), + }, + }, + nil, + nil, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + }, + 0, + }, + { + "TaskRunning", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: baseTimestamp, + }, + }, + []*admin.TaskExecution{ + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 420), + }, + }, + }, + nil, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + }, + 1, + }, + { + "NodesRunning", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: baseTimestamp, + }, + }, + []*admin.TaskExecution{ + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 420), + }, + }, + }, + []*admin.NodeExecution{ + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "start-node", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 435), + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 435), + }, + }, + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "foo", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 445), + StartedAt: addTimestamp(baseTimestamp, 460), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 880), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, + }, + 2, + }, + { + "Completed", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 900), + }, + }, + []*admin.TaskExecution{ + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 420), + }, + }, + }, + []*admin.NodeExecution{ + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "start-node", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 435), + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 435), + }, + }, + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "foo", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 445), + StartedAt: addTimestamp(baseTimestamp, 460), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 880), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15, 20}, + }, + 2, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // initialize mocks + mockNodeExecutionManager := getMockNodeExecutionManager( + test.nodeExecutions, + &admin.DynamicWorkflowNodeMetadata{ + CompiledWorkflow: &core.CompiledWorkflowClosure{ + Primary: &core.CompiledWorkflow{ + Connections: &core.ConnectionSet{ + Upstream: map[string]*core.ConnectionSet_IdList{ + "foo": &core.ConnectionSet_IdList{ + Ids: []string{"start-node"}, + }, + "end-node": &core.ConnectionSet_IdList{ + Ids: []string{"foo"}, + }, + }, + }, + Template: &core.WorkflowTemplate{ + Nodes: []*core.Node{ + &core.Node{ + Id: "foo", + Target: &core.Node_TaskNode{}, + }, + }, + }, + }, + }, + }) + mockTaskExecutionManager := getMockTaskExecutionManager(test.taskExecutions) + metricsManager := MetricsManager{ + nodeExecutionManager: mockNodeExecutionManager, + taskExecutionManager: mockTaskExecutionManager, + } + + // parse node execution + spans := make([]*admin.Span, 0) + err := metricsManager.parseDynamicNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) + assert.Nil(t, err) + + // validate spans + categoryDurations, referenceCount := parseSpansInfo(spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + assert.Equal(t, test.referenceCount, referenceCount) + }) + } +} + +func TestParseExecution(t *testing.T) { + // TODO @hamersaw +} + +func TestParseGateNodeExecution(t *testing.T) { + tests := []struct{ + name string + nodeExecution *admin.NodeExecution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + }{ + { + "NotStarted", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 5), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + }, + }, + { + "Running", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: addTimestamp(baseTimestamp, 10), + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 15), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{5}, + }, + }, + { + "Completed", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: addTimestamp(baseTimestamp, 10), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 425), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, + admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{400}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // initialize mocks + metricsManager := MetricsManager{} + + // parse node execution + spans := make([]*admin.Span, 0) + metricsManager.parseGateNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) + + // validate spans + categoryDurations, _ := parseSpansInfo(spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + }) + } +} + +func TestParseLaunchPlanNodeExecution(t *testing.T) { + tests := []struct{ + name string + nodeExecution *admin.NodeExecution + execution *admin.Execution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + referenceCount int + }{ + { + "NotStarted", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 5), + }, + }, + nil, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + }, + 0, + }, + { + "Running", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: baseTimestamp, + TargetMetadata: &admin.NodeExecutionClosure_WorkflowNodeMetadata{ + WorkflowNodeMetadata: &admin.WorkflowNodeMetadata{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + }, + }, + }, + &admin.Execution{ + Closure: &admin.ExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 15), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + }, + 1, + }, + { + "Completed", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 440), + TargetMetadata: &admin.NodeExecutionClosure_WorkflowNodeMetadata{ + WorkflowNodeMetadata: &admin.WorkflowNodeMetadata{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + }, + }, + }, + &admin.Execution{ + Closure: &admin.ExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 425), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, + }, + 1, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // initialize mocks + mockExecutionManager := getMockExecutionManager(test.execution) + mockNodeExecutionManager := getMockNodeExecutionManager( + []*admin.NodeExecution{ + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "start-node", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 10), + }, + }, + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "foo", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 15), + StartedAt: addTimestamp(baseTimestamp, 20), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 435), + }, + }, + }, nil) + mockTaskExecutionManager := getMockTaskExecutionManager([]*admin.TaskExecution{}) + mockWorkflowManager := getMockWorkflowManager( + &admin.Workflow{ + Closure: &admin.WorkflowClosure{ + CompiledWorkflow: &core.CompiledWorkflowClosure{ + Primary: &core.CompiledWorkflow{ + Connections: &core.ConnectionSet{ + Upstream: map[string]*core.ConnectionSet_IdList{ + "foo": &core.ConnectionSet_IdList{ + Ids: []string{"start-node"}, + }, + "end-node": &core.ConnectionSet_IdList{ + Ids: []string{"foo"}, + }, + }, + }, + Template: &core.WorkflowTemplate{ + Nodes: []*core.Node{ + &core.Node{ + Id: "foo", + Target: &core.Node_TaskNode{}, + }, + }, + }, + }, + }, + }, + }) + metricsManager := MetricsManager{ + executionManager: mockExecutionManager, + nodeExecutionManager: mockNodeExecutionManager, + taskExecutionManager: mockTaskExecutionManager, + workflowManager: mockWorkflowManager, + } + + // parse node execution + spans := make([]*admin.Span, 0) + err := metricsManager.parseLaunchPlanNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) + assert.Nil(t, err) + + // validate spans + categoryDurations, referenceCount := parseSpansInfo(spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + assert.Equal(t, test.referenceCount, referenceCount) + }) + } +} + +func TestParseNodeExecution(t *testing.T) { + // TODO @hamersaw +} + +func TestParseNodeExecutions(t *testing.T) { + // TODO @hamersaw +} + +func TestParseSubworkflowNodeExecution(t *testing.T) { + tests := []struct{ + name string + nodeExecution *admin.NodeExecution + nodeExecutions []*admin.NodeExecution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + referenceCount int + }{ + { + "NotStarted", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 5), + }, + }, + nil, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + }, + 0, + }, + { + "Running", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: baseTimestamp, + }, + }, + []*admin.NodeExecution{ + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "start-node", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 10), + }, + }, + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "foo", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 15), + StartedAt: addTimestamp(baseTimestamp, 20), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 435), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + }, + 1, + }, + { + "Completed", + &admin.NodeExecution{ + Id: &core.NodeExecutionIdentifier{ + ExecutionId: &core.WorkflowExecutionIdentifier{ + }, + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 455), + }, + }, + []*admin.NodeExecution{ + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "start-node", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 10), + }, + }, + &admin.NodeExecution{ + Metadata: &admin.NodeExecutionMetaData { + SpecNodeId: "foo", + }, + Closure: &admin.NodeExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 15), + StartedAt: addTimestamp(baseTimestamp, 20), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 435), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 20}, + }, + 1, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // initialize mocks + mockNodeExecutionManager := getMockNodeExecutionManager(test.nodeExecutions, nil) + mockTaskExecutionManager := getMockTaskExecutionManager([]*admin.TaskExecution{}) + mockWorkflowManager := getMockWorkflowManager( + &admin.Workflow{ + Closure: &admin.WorkflowClosure{ + CompiledWorkflow: &core.CompiledWorkflowClosure{ + Primary: &core.CompiledWorkflow{ + Connections: &core.ConnectionSet{ + Upstream: map[string]*core.ConnectionSet_IdList{ + "foo": &core.ConnectionSet_IdList{ + Ids: []string{"start-node"}, + }, + "end-node": &core.ConnectionSet_IdList{ + Ids: []string{"foo"}, + }, + }, + }, + Template: &core.WorkflowTemplate{ + Nodes: []*core.Node{ + &core.Node{ + Id: "foo", + Target: &core.Node_TaskNode{}, + }, + }, + }, + }, + }, + }, + }) + metricsManager := MetricsManager{ + nodeExecutionManager: mockNodeExecutionManager, + taskExecutionManager: mockTaskExecutionManager, + workflowManager: mockWorkflowManager, + } + + // parse node execution + spans := make([]*admin.Span, 0) + err := metricsManager.parseSubworkflowNodeExecution(context.TODO(), test.nodeExecution, &core.Identifier{}, &spans, -1) + assert.Nil(t, err) + + // validate spans + categoryDurations, referenceCount := parseSpansInfo(spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + assert.Equal(t, test.referenceCount, referenceCount) + }) + } +} + +func TestParseTaskExecution(t *testing.T) { + tests := []struct{ + name string + taskExecution *admin.TaskExecution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + }{ + { + "NotStarted", + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 5), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5}, + }, + }, + { + "Running", + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: addTimestamp(baseTimestamp, 5), + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 605), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5}, + admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{600}, + }, + }, + { + "Completed", + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: addTimestamp(baseTimestamp, 5), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 415), + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5, 10}, + admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{400}, + }, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // parse task execution + span := parseTaskExecution(test.taskExecution) + spanReference, ok := span.Info.(*admin.Span_Reference) + assert.True(t, ok) + + // validate spans + categoryDurations, referenceCount := parseSpansInfo(spanReference.Reference.Spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + assert.Equal(t, 0, referenceCount) + }) + } +} + +func TestParseTaskExecutions(t *testing.T) { + tests := []struct{ + name string + taskExecutions []*admin.TaskExecution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + referenceCount int + }{ + { + "SingleAttempt", + []*admin.TaskExecution{ + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: addTimestamp(baseTimestamp, 5), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 415), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{}, + 1, + }, + { + "MultipleAttempts", + []*admin.TaskExecution{ + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: addTimestamp(baseTimestamp, 5), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 605), + }, + }, + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 625), + StartedAt: addTimestamp(baseTimestamp, 630), + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 630), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{20}, + }, + 2, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // parse task executions + spans := make([]*admin.Span, 0) + parseTaskExecutions(test.taskExecutions, &spans, -1) + + // validate spans + categoryDurations, referenceCount := parseSpansInfo(spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + assert.Equal(t, test.referenceCount, referenceCount) + }) + } +} + +func TestParseTaskNodeExecution(t *testing.T) { + tests := []struct{ + name string + nodeExecution *admin.NodeExecution + taskExecutions []*admin.TaskExecution + categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + referenceCount int + }{ + { + "NotStarted", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 5), + }, + }, + nil, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + }, + 0, + }, + { + "Running", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 10), + }, + }, + []*admin.TaskExecution{ + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 420), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + }, + 1, + }, + { + "Completed", + &admin.NodeExecution{ + Closure: &admin.NodeExecutionClosure{ + CreatedAt: baseTimestamp, + StartedAt: emptyTimestamp, + Duration: emptyDuration, + UpdatedAt: addTimestamp(baseTimestamp, 435), + }, + }, + []*admin.TaskExecution{ + &admin.TaskExecution{ + Closure: &admin.TaskExecutionClosure{ + CreatedAt: addTimestamp(baseTimestamp, 10), + StartedAt: addTimestamp(baseTimestamp, 15), + Duration: baseDuration, + UpdatedAt: addTimestamp(baseTimestamp, 420), + }, + }, + }, + map[admin.CategoricalSpanInfo_Category][]int64{ + admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, + }, + 1, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // initialize mocks + mockTaskExecutionManager := getMockTaskExecutionManager(test.taskExecutions) + metricsManager := MetricsManager{ + taskExecutionManager: mockTaskExecutionManager, + } + + // parse node execution + spans := make([]*admin.Span, 0) + err := metricsManager.parseTaskNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) + assert.Nil(t, err) + + // validate spans + categoryDurations, referenceCount := parseSpansInfo(spans) + assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + assert.Equal(t, test.referenceCount, referenceCount) + }) + } +} diff --git a/pkg/manager/mocks/workflow.go b/pkg/manager/mocks/workflow.go index d1f55750e..5055b2b4c 100644 --- a/pkg/manager/mocks/workflow.go +++ b/pkg/manager/mocks/workflow.go @@ -7,9 +7,11 @@ import ( ) type CreateWorkflowFunc func(ctx context.Context, request admin.WorkflowCreateRequest) (*admin.WorkflowCreateResponse, error) +type GetWorkflowFunc func(ctx context.Context, request admin.ObjectGetRequest) (*admin.Workflow, error) type MockWorkflowManager struct { createWorkflowFunc CreateWorkflowFunc + getWorkflowFunc GetWorkflowFunc } func (r *MockWorkflowManager) SetCreateCallback(createFunction CreateWorkflowFunc) { @@ -30,8 +32,15 @@ func (r *MockWorkflowManager) ListWorkflows(ctx context.Context, return nil, nil } +func (r *MockWorkflowManager) SetGetCallback(getFunction GetWorkflowFunc) { + r.getWorkflowFunc = getFunction +} + func (r *MockWorkflowManager) GetWorkflow( ctx context.Context, request admin.ObjectGetRequest) (*admin.Workflow, error) { + if r.getWorkflowFunc != nil { + return r.getWorkflowFunc(ctx, request) + } return nil, nil } From b373ddcbaab97094b7424e06acc7a2d1323de15b Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Thu, 23 Feb 2023 04:03:24 -0600 Subject: [PATCH 13/27] fixed most lint issues Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 157 ++++++++++------------- pkg/manager/impl/metrics_manager_test.go | 123 ++++++++---------- pkg/rpc/adminservice/base.go | 14 +- 3 files changed, 131 insertions(+), 163 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 2691759a9..e70fc52b7 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -21,22 +21,21 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) -const REQUEST_LIMIT uint32 = 50 +const RequestLimit uint32 = 50 var ( nilDuration *duration.Duration = &duration.Duration{ Seconds: 0, - Nanos: 0, + Nanos: 0, } nilTimestamp *timestamp.Timestamp = ×tamp.Timestamp{ Seconds: 0, - Nanos: 0, + Nanos: 0, } ) type metrics struct { Scope promutils.Scope - //Set labeled.Counter } type MetricsManager struct { @@ -50,28 +49,28 @@ type MetricsManager struct { func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category admin.CategoricalSpanInfo_Category) *admin.Span { return &admin.Span{ StartTime: startTime, - EndTime: endTime, + EndTime: endTime, Info: &admin.Span_Category{ Category: &admin.CategoricalSpanInfo{ - Category: category, + Category: category, }, }, } } -func getBranchNode(nodeId string, branchNode *core.BranchNode) *core.Node { - if branchNode.IfElse.Case.ThenNode.Id == nodeId { +func getBranchNode(nodeID string, branchNode *core.BranchNode) *core.Node { + if branchNode.IfElse.Case.ThenNode.Id == nodeID { return branchNode.IfElse.Case.ThenNode } for _, other := range branchNode.IfElse.Other { - if other.ThenNode.Id == nodeId { + if other.ThenNode.Id == nodeID { return other.ThenNode } } if elseNode, ok := branchNode.IfElse.Default.(*core.IfElseBlock_ElseNode); ok { - if elseNode.ElseNode.Id == nodeId { + if elseNode.ElseNode.Id == nodeID { return elseNode.ElseNode } } @@ -79,15 +78,14 @@ func getBranchNode(nodeId string, branchNode *core.BranchNode) *core.Node { return nil } - -func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nodeId string, - upstreamNodeIds map[string]*core.ConnectionSet_IdList, nodeExecutions map[string]*admin.NodeExecution) (*admin.NodeExecution, error) { +func (m *MetricsManager) getLatestUpstreamNodeExecution(nodeID string, upstreamNodeIds map[string]*core.ConnectionSet_IdList, + nodeExecutions map[string]*admin.NodeExecution) *admin.NodeExecution { var nodeExecution *admin.NodeExecution var latestUpstreamUpdatedAt = time.Unix(0, 0) - if connectionSet, exists := upstreamNodeIds[nodeId]; exists { - for _, upstreamNodeId := range connectionSet.Ids { - upstreamNodeExecution, exists := nodeExecutions[upstreamNodeId] + if connectionSet, exists := upstreamNodeIds[nodeID]; exists { + for _, upstreamNodeID := range connectionSet.Ids { + upstreamNodeExecution, exists := nodeExecutions[upstreamNodeID] if !exists { continue } @@ -100,7 +98,7 @@ func (m *MetricsManager) getLatestUpstreamNodeExecution(ctx context.Context, nod } } - return nodeExecution, nil + return nodeExecution } func (m *MetricsManager) getNodeExecutions(ctx context.Context, request admin.NodeExecutionListRequest) (map[string]*admin.NodeExecution, error) { @@ -133,9 +131,7 @@ func (m *MetricsManager) getTaskExecutions(ctx context.Context, request admin.Ta return nil, err } - for _, taskExecution := range response.TaskExecutions { - taskExecutions = append(taskExecutions, taskExecution) - } + taskExecutions = append(taskExecutions, response.TaskExecutions...) if len(response.TaskExecutions) < int(request.Limit) { break @@ -153,8 +149,8 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, // retrieve node execution(s) nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ WorkflowExecutionId: nodeExecution.Id.ExecutionId, - Limit: REQUEST_LIMIT, - UniqueParentId: nodeExecution.Id.NodeId, + Limit: RequestLimit, + UniqueParentId: nodeExecution.Id.NodeId, }) if err != nil { return err @@ -205,7 +201,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ NodeExecutionId: nodeExecution.Id, - Limit: REQUEST_LIMIT, + Limit: RequestLimit, }) if err != nil { return err @@ -225,8 +221,8 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ WorkflowExecutionId: nodeExecution.Id.ExecutionId, - Limit: REQUEST_LIMIT, - UniqueParentId: nodeExecution.Id.NodeId, + Limit: RequestLimit, + UniqueParentId: nodeExecution.Id.NodeId, }) if err != nil { return err @@ -256,12 +252,8 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec } // backened overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return err - } - if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) @@ -291,7 +283,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ WorkflowExecutionId: execution.Id, - Limit: REQUEST_LIMIT, + Limit: RequestLimit, }) if err != nil { return nil, err @@ -306,19 +298,15 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex // compute frontend overhead spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) - + // iterate over nodes and compute overhead if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, &spans, depth-1); err != nil { return nil, err } // compute backend overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return nil, err - } - if latestUpstreamNode != nil && !execution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { spans = append(spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) @@ -330,14 +318,14 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex return &admin.Span{ StartTime: execution.Closure.CreatedAt, - EndTime: execution.Closure.UpdatedAt, + EndTime: execution.Closure.UpdatedAt, Info: &admin.Span_Reference{ Reference: referenceSpan, }, }, nil } -func (m *MetricsManager) parseGateNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) { +func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span) { // check if node has started yet if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, nilTimestamp) { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, @@ -413,33 +401,33 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * // parse node var err error switch target := node.Target.(type) { - case *core.Node_BranchNode: - // handle branch node - err = m.parseBranchNodeExecution(ctx, nodeExecution, target.BranchNode, &spans, depth-1) - case *core.Node_GateNode: - // handle gate node - m.parseGateNodeExecution(ctx, nodeExecution, &spans, depth-1) - case *core.Node_TaskNode: - if nodeExecution.Metadata.IsParentNode { - // handle dynamic node - err = m.parseDynamicNodeExecution(ctx, nodeExecution, &spans, depth-1) - } else { - // handle task node - err = m.parseTaskNodeExecution(ctx, nodeExecution, &spans, depth-1) - } - case *core.Node_WorkflowNode: - switch workflow := target.WorkflowNode.Reference.(type) { - case *core.WorkflowNode_LaunchplanRef: - // handle launch plan - err = m.parseLaunchPlanNodeExecution(ctx, nodeExecution, &spans, depth-1) - case *core.WorkflowNode_SubWorkflowRef: - // handle subworkflow - err = m.parseSubworkflowNodeExecution(ctx, nodeExecution, workflow.SubWorkflowRef, &spans, depth-1) - default: - err = fmt.Errorf("unsupported node type %+v\n", target) - } + case *core.Node_BranchNode: + // handle branch node + err = m.parseBranchNodeExecution(ctx, nodeExecution, target.BranchNode, &spans, depth-1) + case *core.Node_GateNode: + // handle gate node + m.parseGateNodeExecution(ctx, nodeExecution, &spans) + case *core.Node_TaskNode: + if nodeExecution.Metadata.IsParentNode { + // handle dynamic node + err = m.parseDynamicNodeExecution(ctx, nodeExecution, &spans, depth-1) + } else { + // handle task node + err = m.parseTaskNodeExecution(ctx, nodeExecution, &spans, depth-1) + } + case *core.Node_WorkflowNode: + switch workflow := target.WorkflowNode.Reference.(type) { + case *core.WorkflowNode_LaunchplanRef: + // handle launch plan + err = m.parseLaunchPlanNodeExecution(ctx, nodeExecution, &spans, depth-1) + case *core.WorkflowNode_SubWorkflowRef: + // handle subworkflow + err = m.parseSubworkflowNodeExecution(ctx, nodeExecution, workflow.SubWorkflowRef, &spans, depth-1) default: - err = fmt.Errorf("unsupported node type %+v\n", target) + err = fmt.Errorf("unsupported node type %+v", target) + } + default: + err = fmt.Errorf("unsupported node type %+v", target) } if err != nil { @@ -451,7 +439,7 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * return &admin.Span{ StartTime: nodeExecution.Closure.CreatedAt, - EndTime: nodeExecution.Closure.UpdatedAt, + EndTime: nodeExecution.Closure.UpdatedAt, Info: &admin.Span_Reference{ Reference: referenceSpan, }, @@ -463,9 +451,9 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions // sort node executions sortedNodeExecutions := make([]*admin.NodeExecution, 0, len(nodeExecutions)) - for _, nodeExecution := range nodeExecutions { - sortedNodeExecutions = append(sortedNodeExecutions, nodeExecution) - } + for _, nodeExecution := range nodeExecutions { + sortedNodeExecutions = append(sortedNodeExecutions, nodeExecution) + } sort.Slice(sortedNodeExecutions, func(i, j int) bool { x := sortedNodeExecutions[i].Closure.CreatedAt.AsTime() y := sortedNodeExecutions[j].Closure.CreatedAt.AsTime() @@ -474,15 +462,15 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions // iterate over sorted node executions for _, nodeExecution := range sortedNodeExecutions { - specNodeId := nodeExecution.Metadata.SpecNodeId - if specNodeId == "start-node" || specNodeId == "end-node" { + specNodeID := nodeExecution.Metadata.SpecNodeId + if specNodeID == "start-node" || specNodeID == "end-node" { continue } - // get node defintion from workflow + // get node definition from workflow var node *core.Node for _, n := range compiledWorkflowClosure.Primary.Template.Nodes { - if n.Id == specNodeId { + if n.Id == specNodeID { node = n } } @@ -499,12 +487,8 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions // prepend nodeExecution spans with NODE_TRANSITION time if referenceSpan, ok := nodeExecutionSpan.Info.(*admin.Span_Reference); ok { - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, specNodeId, + latestUpstreamNode := m.getLatestUpstreamNodeExecution(specNodeID, compiledWorkflowClosure.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return err - } - if latestUpstreamNode != nil { referenceSpan.Reference.Spans = append([]*admin.Span{createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, nodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_TRANSITION)}, referenceSpan.Reference.Spans...) @@ -523,8 +507,8 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, // retrieve node execution(s) nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ WorkflowExecutionId: nodeExecution.Id.ExecutionId, - Limit: REQUEST_LIMIT, - UniqueParentId: nodeExecution.Id.NodeId, + Limit: RequestLimit, + UniqueParentId: nodeExecution.Id.NodeId, }) if err != nil { return err @@ -553,12 +537,8 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, } // backened overhead - latestUpstreamNode, err := m.getLatestUpstreamNodeExecution(ctx, "end-node", + latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) - if err != nil { - return err - } - if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) @@ -601,7 +581,7 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { return &admin.Span{ StartTime: taskExecution.Closure.CreatedAt, EndTime: taskExecution.Closure.UpdatedAt, - Info: &admin.Span_Reference{ + Info: &admin.Span_Reference{ Reference: &admin.ReferenceSpanInfo{ Id: &admin.ReferenceSpanInfo_TaskId{ TaskId: taskExecution.Id, @@ -637,7 +617,7 @@ func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecuti // retrieve task executions taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ NodeExecutionId: nodeExecution.Id, - Limit: REQUEST_LIMIT, + Limit: RequestLimit, }) if err != nil { return err @@ -669,7 +649,7 @@ func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecuti // TODO @hamersaw - docs func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, request admin.WorkflowExecutionGetMetricsRequest) (*admin.WorkflowExecutionGetMetricsResponse, error) { - + // retrieve workflow execution executionRequest := admin.WorkflowExecutionGetRequest{Id: request.Id} execution, err := m.executionManager.GetExecution(ctx, executionRequest) @@ -712,7 +692,6 @@ func NewMetricsManager( scope promutils.Scope) interfaces.MetricsInterface { metrics := metrics{ Scope: scope, - //Set: labeled.NewCounter("num_set", "count of set metricss", scope), } return &MetricsManager{ diff --git a/pkg/manager/impl/metrics_manager_test.go b/pkg/manager/impl/metrics_manager_test.go index 1dfcea4ec..e1c466a83 100644 --- a/pkg/manager/impl/metrics_manager_test.go +++ b/pkg/manager/impl/metrics_manager_test.go @@ -8,8 +8,8 @@ import ( "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" - "github.com/flyteorg/flyteadmin/pkg/manager/mocks" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" + "github.com/flyteorg/flyteadmin/pkg/manager/mocks" "github.com/golang/protobuf/ptypes/duration" "github.com/golang/protobuf/ptypes/timestamp" @@ -20,27 +20,27 @@ import ( var ( emptyTimestamp = ×tamp.Timestamp{ Seconds: 0, - Nanos: 0, + Nanos: 0, } baseTimestamp = ×tamp.Timestamp{ Seconds: 643852800, - Nanos: 0, + Nanos: 0, } emptyDuration = &duration.Duration{ Seconds: 0, - Nanos: 0, + Nanos: 0, } baseDuration = &duration.Duration{ Seconds: 400, - Nanos: 0, + Nanos: 0, } ) func addTimestamp(ts *timestamp.Timestamp, seconds int64) *timestamp.Timestamp { return ×tamp.Timestamp{ Seconds: ts.Seconds + seconds, - Nanos: ts.Nanos, + Nanos: ts.Nanos, } } @@ -61,7 +61,7 @@ func getMockNodeExecutionManager(nodeExecutions []*admin.NodeExecution, mockNodeExecutionManager.SetListNodeExecutionsFunc( func(ctx context.Context, request admin.NodeExecutionListRequest) (*admin.NodeExecutionList, error) { return &admin.NodeExecutionList{ - NodeExecutions: nodeExecutions, + NodeExecutions: nodeExecutions, }, nil }) mockNodeExecutionManager.SetGetNodeExecutionDataFunc( @@ -79,7 +79,7 @@ func getMockTaskExecutionManager(taskExecutions []*admin.TaskExecution) interfac mockTaskExecutionManager.SetListTaskExecutionsCallback( func(ctx context.Context, request admin.TaskExecutionListRequest) (*admin.TaskExecutionList, error) { return &admin.TaskExecutionList{ - TaskExecutions: taskExecutions, + TaskExecutions: taskExecutions, }, nil }) @@ -122,7 +122,7 @@ func TestGetLatestUpstreamNodeExecution(t *testing.T) { } func TestParseBranchNodeExecution(t *testing.T) { - tests := []struct{ + tests := []struct { name string nodeExecution *admin.NodeExecution nodeExecutions []*admin.NodeExecution @@ -133,8 +133,7 @@ func TestParseBranchNodeExecution(t *testing.T) { "NotStarted", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -153,8 +152,7 @@ func TestParseBranchNodeExecution(t *testing.T) { "Running", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -165,7 +163,7 @@ func TestParseBranchNodeExecution(t *testing.T) { }, []*admin.NodeExecution{ &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "foo", }, Closure: &admin.NodeExecutionClosure{ @@ -185,8 +183,7 @@ func TestParseBranchNodeExecution(t *testing.T) { "Completed", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -197,7 +194,7 @@ func TestParseBranchNodeExecution(t *testing.T) { }, []*admin.NodeExecution{ &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "foo", }, Closure: &admin.NodeExecutionClosure{ @@ -214,7 +211,7 @@ func TestParseBranchNodeExecution(t *testing.T) { 1, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // initialize mocks @@ -230,7 +227,7 @@ func TestParseBranchNodeExecution(t *testing.T) { IfElse: &core.IfElseBlock{ Case: &core.IfBlock{ ThenNode: &core.Node{ - Id: "foo", + Id: "foo", Target: &core.Node_TaskNode{}, }, }, @@ -250,7 +247,7 @@ func TestParseBranchNodeExecution(t *testing.T) { } func TestParseDynamicNodeExecution(t *testing.T) { - tests := []struct{ + tests := []struct { name string nodeExecution *admin.NodeExecution taskExecutions []*admin.TaskExecution @@ -279,8 +276,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { "TaskRunning", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -309,8 +305,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { "NodesRunning", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -331,7 +326,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, []*admin.NodeExecution{ &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "start-node", }, Closure: &admin.NodeExecutionClosure{ @@ -342,7 +337,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, }, &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "foo", }, Closure: &admin.NodeExecutionClosure{ @@ -362,8 +357,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { "Completed", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -384,7 +378,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, []*admin.NodeExecution{ &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "start-node", }, Closure: &admin.NodeExecutionClosure{ @@ -395,7 +389,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, }, &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "foo", }, Closure: &admin.NodeExecutionClosure{ @@ -412,7 +406,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { 2, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // initialize mocks @@ -434,7 +428,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { Template: &core.WorkflowTemplate{ Nodes: []*core.Node{ &core.Node{ - Id: "foo", + Id: "foo", Target: &core.Node_TaskNode{}, }, }, @@ -466,7 +460,7 @@ func TestParseExecution(t *testing.T) { } func TestParseGateNodeExecution(t *testing.T) { - tests := []struct{ + tests := []struct { name string nodeExecution *admin.NodeExecution categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 @@ -497,7 +491,7 @@ func TestParseGateNodeExecution(t *testing.T) { }, map[admin.CategoricalSpanInfo_Category][]int64{ admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, - admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{5}, + admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{5}, }, }, { @@ -512,11 +506,11 @@ func TestParseGateNodeExecution(t *testing.T) { }, map[admin.CategoricalSpanInfo_Category][]int64{ admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, - admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{400}, + admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{400}, }, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // initialize mocks @@ -524,7 +518,7 @@ func TestParseGateNodeExecution(t *testing.T) { // parse node execution spans := make([]*admin.Span, 0) - metricsManager.parseGateNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) + metricsManager.parseGateNodeExecution(context.TODO(), test.nodeExecution, &spans) // validate spans categoryDurations, _ := parseSpansInfo(spans) @@ -534,7 +528,7 @@ func TestParseGateNodeExecution(t *testing.T) { } func TestParseLaunchPlanNodeExecution(t *testing.T) { - tests := []struct{ + tests := []struct { name string nodeExecution *admin.NodeExecution execution *admin.Execution @@ -567,8 +561,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { UpdatedAt: baseTimestamp, TargetMetadata: &admin.NodeExecutionClosure_WorkflowNodeMetadata{ WorkflowNodeMetadata: &admin.WorkflowNodeMetadata{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, }, }, @@ -596,8 +589,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 440), TargetMetadata: &admin.NodeExecutionClosure_WorkflowNodeMetadata{ WorkflowNodeMetadata: &admin.WorkflowNodeMetadata{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, }, }, @@ -616,7 +608,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { 1, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // initialize mocks @@ -624,7 +616,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { mockNodeExecutionManager := getMockNodeExecutionManager( []*admin.NodeExecution{ &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "start-node", }, Closure: &admin.NodeExecutionClosure{ @@ -635,7 +627,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { }, }, &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "foo", }, Closure: &admin.NodeExecutionClosure{ @@ -665,7 +657,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { Template: &core.WorkflowTemplate{ Nodes: []*core.Node{ &core.Node{ - Id: "foo", + Id: "foo", Target: &core.Node_TaskNode{}, }, }, @@ -703,7 +695,7 @@ func TestParseNodeExecutions(t *testing.T) { } func TestParseSubworkflowNodeExecution(t *testing.T) { - tests := []struct{ + tests := []struct { name string nodeExecution *admin.NodeExecution nodeExecutions []*admin.NodeExecution @@ -714,8 +706,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { "NotStarted", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -734,8 +725,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { "Running", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -746,7 +736,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, []*admin.NodeExecution{ &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "start-node", }, Closure: &admin.NodeExecutionClosure{ @@ -757,7 +747,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, }, &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "foo", }, Closure: &admin.NodeExecutionClosure{ @@ -777,8 +767,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { "Completed", &admin.NodeExecution{ Id: &core.NodeExecutionIdentifier{ - ExecutionId: &core.WorkflowExecutionIdentifier{ - }, + ExecutionId: &core.WorkflowExecutionIdentifier{}, }, Closure: &admin.NodeExecutionClosure{ CreatedAt: baseTimestamp, @@ -789,7 +778,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, []*admin.NodeExecution{ &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "start-node", }, Closure: &admin.NodeExecutionClosure{ @@ -800,7 +789,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, }, &admin.NodeExecution{ - Metadata: &admin.NodeExecutionMetaData { + Metadata: &admin.NodeExecutionMetaData{ SpecNodeId: "foo", }, Closure: &admin.NodeExecutionClosure{ @@ -817,7 +806,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { 1, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // initialize mocks @@ -841,7 +830,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { Template: &core.WorkflowTemplate{ Nodes: []*core.Node{ &core.Node{ - Id: "foo", + Id: "foo", Target: &core.Node_TaskNode{}, }, }, @@ -870,7 +859,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { } func TestParseTaskExecution(t *testing.T) { - tests := []struct{ + tests := []struct { name string taskExecution *admin.TaskExecution categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 @@ -901,7 +890,7 @@ func TestParseTaskExecution(t *testing.T) { }, map[admin.CategoricalSpanInfo_Category][]int64{ admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5}, - admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{600}, + admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{600}, }, }, { @@ -916,11 +905,11 @@ func TestParseTaskExecution(t *testing.T) { }, map[admin.CategoricalSpanInfo_Category][]int64{ admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5, 10}, - admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{400}, + admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{400}, }, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // parse task execution @@ -937,7 +926,7 @@ func TestParseTaskExecution(t *testing.T) { } func TestParseTaskExecutions(t *testing.T) { - tests := []struct{ + tests := []struct { name string taskExecutions []*admin.TaskExecution categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 @@ -984,7 +973,7 @@ func TestParseTaskExecutions(t *testing.T) { 2, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // parse task executions @@ -1000,7 +989,7 @@ func TestParseTaskExecutions(t *testing.T) { } func TestParseTaskNodeExecution(t *testing.T) { - tests := []struct{ + tests := []struct { name string nodeExecution *admin.NodeExecution taskExecutions []*admin.TaskExecution @@ -1074,7 +1063,7 @@ func TestParseTaskNodeExecution(t *testing.T) { 1, }, } - + for _, test := range tests { t.Run(test.name, func(t *testing.T) { // initialize mocks diff --git a/pkg/rpc/adminservice/base.go b/pkg/rpc/adminservice/base.go index ebaf7e0e7..77c78f480 100644 --- a/pkg/rpc/adminservice/base.go +++ b/pkg/rpc/adminservice/base.go @@ -161,7 +161,7 @@ func NewAdminServer(ctx context.Context, pluginRegistry *plugins.Registry, confi nodeExecutionManager := manager.NewNodeExecutionManager(repo, configuration, applicationConfiguration.GetMetadataStoragePrefix(), dataStorageClient, adminScope.NewSubScope("node_execution_manager"), urlData, eventPublisher, cloudEventPublisher, nodeExecutionEventWriter) taskExecutionManager := manager.NewTaskExecutionManager(repo, configuration, dataStorageClient, - adminScope.NewSubScope("task_execution_manager"), urlData, eventPublisher, cloudEventPublisher) + adminScope.NewSubScope("task_execution_manager"), urlData, eventPublisher, cloudEventPublisher) logger.Info(ctx, "Initializing a new AdminService") return &AdminService{ @@ -173,12 +173,12 @@ func NewAdminServer(ctx context.Context, pluginRegistry *plugins.Registry, confi NamedEntityManager: namedEntityManager, DescriptionEntityManager: descriptionEntityManager, VersionManager: versionManager, - NodeExecutionManager: nodeExecutionManager, - TaskExecutionManager: taskExecutionManager, - ProjectManager: manager.NewProjectManager(repo, configuration), - ResourceManager: resources.NewResourceManager(repo, configuration.ApplicationConfiguration()), - MetricsManager: manager.NewMetricsManager(repo, workflowManager, executionManager, nodeExecutionManager, + NodeExecutionManager: nodeExecutionManager, + TaskExecutionManager: taskExecutionManager, + ProjectManager: manager.NewProjectManager(repo, configuration), + ResourceManager: resources.NewResourceManager(repo, configuration.ApplicationConfiguration()), + MetricsManager: manager.NewMetricsManager(workflowManager, executionManager, nodeExecutionManager, taskExecutionManager, adminScope.NewSubScope("metrics_manager")), - Metrics: InitMetrics(adminScope), + Metrics: InitMetrics(adminScope), } } From f2adc24aeebf28d3eb7acfc295493751a913baf4 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Thu, 23 Feb 2023 10:37:00 -0600 Subject: [PATCH 14/27] fixed lint issues Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 56 +++++++++++++++++------- pkg/manager/impl/metrics_manager_test.go | 29 +----------- 2 files changed, 43 insertions(+), 42 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index e70fc52b7..8f3c13298 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -8,7 +8,6 @@ import ( "time" "github.com/flyteorg/flyteadmin/pkg/manager/interfaces" - "github.com/pkg/errors" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" @@ -24,11 +23,11 @@ import ( const RequestLimit uint32 = 50 var ( - nilDuration *duration.Duration = &duration.Duration{ + emptyDuration *duration.Duration = &duration.Duration{ Seconds: 0, Nanos: 0, } - nilTimestamp *timestamp.Timestamp = ×tamp.Timestamp{ + emptyTimestamp *timestamp.Timestamp = ×tamp.Timestamp{ Seconds: 0, Nanos: 0, } @@ -163,7 +162,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, } else { // parse branchNode if len(nodeExecutions) != 1 { - // TODO @hamersaw throw error - branch nodes execute a single node + return fmt.Errorf("invalid branch node execution: expected 1 but found %d node execution(s)", len(nodeExecutions)) } var branchNodeExecution *admin.NodeExecution @@ -173,7 +172,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, node := getBranchNode(branchNodeExecution.Metadata.SpecNodeId, branchNode) if node != nil { - // TODO @hamersaw throw error - failed to parse node + return fmt.Errorf("failed to identify branch node final node definition") } // frontend overhead @@ -291,7 +290,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex // check if workflow has started startNode := nodeExecutions["start-node"] - if startNode.Closure.UpdatedAt == nil || reflect.DeepEqual(startNode.Closure.UpdatedAt, nilTimestamp) { + if startNode.Closure.UpdatedAt == nil || reflect.DeepEqual(startNode.Closure.UpdatedAt, emptyTimestamp) { spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) } else { @@ -327,7 +326,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span) { // check if node has started yet - if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, nilTimestamp) { + if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, emptyTimestamp) { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) } else { @@ -336,7 +335,7 @@ func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution nodeExecution.Closure.StartedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) // check if plugin has completed yet - if nodeExecution.Closure.Duration == nil || reflect.DeepEqual(nodeExecution.Closure.Duration, nilDuration) { + if nodeExecution.Closure.Duration == nil || reflect.DeepEqual(nodeExecution.Closure.Duration, emptyDuration) { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_IDLE)) } else { @@ -424,10 +423,10 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * // handle subworkflow err = m.parseSubworkflowNodeExecution(ctx, nodeExecution, workflow.SubWorkflowRef, &spans, depth-1) default: - err = fmt.Errorf("unsupported node type %+v", target) + err = fmt.Errorf("failed to identify workflow node type for node: %+v", target) } default: - err = fmt.Errorf("unsupported node type %+v", target) + err = fmt.Errorf("failed to identify node type for node: %+v", target) } if err != nil { @@ -476,7 +475,8 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions } if node == nil { - return errors.New("failed to identify workflow node") // TODO @hamersaw - do gooder + return fmt.Errorf("failed to discover workflow node '%s' in workflow '%+v'", + specNodeID, compiledWorkflowClosure.Primary.Template.Id) } // parse node execution @@ -552,7 +552,7 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { spans := make([]*admin.Span, 0) // check if plugin has started yet - if taskExecution.Closure.StartedAt == nil || reflect.DeepEqual(taskExecution.Closure.StartedAt, nilTimestamp) { + if taskExecution.Closure.StartedAt == nil || reflect.DeepEqual(taskExecution.Closure.StartedAt, emptyTimestamp) { spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) } else { @@ -561,7 +561,7 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) // check if plugin has completed yet - if taskExecution.Closure.Duration == nil || reflect.DeepEqual(taskExecution.Closure.Duration, nilDuration) { + if taskExecution.Closure.Duration == nil || reflect.DeepEqual(taskExecution.Closure.Duration, emptyDuration) { spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_RUNTIME)) } else { @@ -669,14 +669,40 @@ func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { - // retrieve node execution + // retrieve node execution, workflow execution, and workflow nodeRequest := admin.NodeExecutionGetRequest{Id: request.Id} nodeExecution, err := m.nodeExecutionManager.GetNodeExecution(ctx, nodeRequest) if err != nil { return nil, err } - span, err := m.parseNodeExecution(ctx, nodeExecution, nil, int(request.Depth)) // TODO @hamersaw can NOT pass nil for Node - FIX IMMEDIATELY + executionRequest := admin.WorkflowExecutionGetRequest{Id: nodeExecution.Id.ExecutionId} + execution, err := m.executionManager.GetExecution(ctx, executionRequest) + if err != nil { + return nil, err + } + + workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} + workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) + if err != nil { + return nil, err + } + + // get node definition from workflow + var node *core.Node + for _, n := range workflow.Closure.CompiledWorkflow.Primary.Template.Nodes { + if n.Id == nodeExecution.Metadata.SpecNodeId { + node = n + } + } + + if node == nil { + return nil, fmt.Errorf("failed to discover workflow node '%s' in workflow '%+v'", + nodeExecution.Metadata.SpecNodeId, workflow.Closure.CompiledWorkflow.Primary.Template.Id) + } + + // parse node execution + span, err := m.parseNodeExecution(ctx, nodeExecution, node, int(request.Depth)) if err != nil { return nil, err } diff --git a/pkg/manager/impl/metrics_manager_test.go b/pkg/manager/impl/metrics_manager_test.go index e1c466a83..f83d79c97 100644 --- a/pkg/manager/impl/metrics_manager_test.go +++ b/pkg/manager/impl/metrics_manager_test.go @@ -18,23 +18,14 @@ import ( ) var ( - emptyTimestamp = ×tamp.Timestamp{ - Seconds: 0, + baseDuration = &duration.Duration{ + Seconds: 400, Nanos: 0, } baseTimestamp = ×tamp.Timestamp{ Seconds: 643852800, Nanos: 0, } - - emptyDuration = &duration.Duration{ - Seconds: 0, - Nanos: 0, - } - baseDuration = &duration.Duration{ - Seconds: 400, - Nanos: 0, - } ) func addTimestamp(ts *timestamp.Timestamp, seconds int64) *timestamp.Timestamp { @@ -117,10 +108,6 @@ func parseSpansInfo(spans []*admin.Span) (map[admin.CategoricalSpanInfo_Category return categoryDurations, referenceCount } -func TestGetLatestUpstreamNodeExecution(t *testing.T) { - // TODO @hamersaw -} - func TestParseBranchNodeExecution(t *testing.T) { tests := []struct { name string @@ -455,10 +442,6 @@ func TestParseDynamicNodeExecution(t *testing.T) { } } -func TestParseExecution(t *testing.T) { - // TODO @hamersaw -} - func TestParseGateNodeExecution(t *testing.T) { tests := []struct { name string @@ -686,14 +669,6 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { } } -func TestParseNodeExecution(t *testing.T) { - // TODO @hamersaw -} - -func TestParseNodeExecutions(t *testing.T) { - // TODO @hamersaw -} - func TestParseSubworkflowNodeExecution(t *testing.T) { tests := []struct { name string From dd46a55e6ba01f1d589e45acdd8b31521783c3a2 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Thu, 23 Feb 2023 11:11:38 -0600 Subject: [PATCH 15/27] added docs Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 37 +++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 8f3c13298..aad06c191 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -37,6 +37,7 @@ type metrics struct { Scope promutils.Scope } +// MetricsManager handles computation of workflow, node, and task execution metrics. type MetricsManager struct { workflowManager interfaces.WorkflowInterface executionManager interfaces.ExecutionInterface @@ -45,6 +46,7 @@ type MetricsManager struct { metrics metrics } +// createCategoricalSpan returns a Span defined by the provided arguments. func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category admin.CategoricalSpanInfo_Category) *admin.Span { return &admin.Span{ StartTime: startTime, @@ -57,6 +59,7 @@ func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category adm } } +// getBranchNode searches the provided BranchNode definition for the Node identified by nodeID. func getBranchNode(nodeID string, branchNode *core.BranchNode) *core.Node { if branchNode.IfElse.Case.ThenNode.Id == nodeID { return branchNode.IfElse.Case.ThenNode @@ -77,6 +80,9 @@ func getBranchNode(nodeID string, branchNode *core.BranchNode) *core.Node { return nil } +// getLatestUpstreamNodeExecution returns the NodeExecution with the latest UpdatedAt timestamp that is an upstream +// dependency of the provided nodeID. This is useful for computing the duration between when a node is first available +// for scheduling and when it is actually scheduled. func (m *MetricsManager) getLatestUpstreamNodeExecution(nodeID string, upstreamNodeIds map[string]*core.ConnectionSet_IdList, nodeExecutions map[string]*admin.NodeExecution) *admin.NodeExecution { @@ -100,6 +106,7 @@ func (m *MetricsManager) getLatestUpstreamNodeExecution(nodeID string, upstreamN return nodeExecution } +// getNodeExecutions queries the nodeExecutionManager for NodeExecutions adhering to the specified request. func (m *MetricsManager) getNodeExecutions(ctx context.Context, request admin.NodeExecutionListRequest) (map[string]*admin.NodeExecution, error) { nodeExecutions := make(map[string]*admin.NodeExecution) for { @@ -122,6 +129,7 @@ func (m *MetricsManager) getNodeExecutions(ctx context.Context, request admin.No return nodeExecutions, nil } +// getTaskExecutions queries the taskExecutionManager for TaskExecutions adhering to the specified request. func (m *MetricsManager) getTaskExecutions(ctx context.Context, request admin.TaskExecutionListRequest) ([]*admin.TaskExecution, error) { taskExecutions := make([]*admin.TaskExecution, 0) for { @@ -142,6 +150,8 @@ func (m *MetricsManager) getTaskExecutions(ctx context.Context, request admin.Ta return taskExecutions, nil } +// parseBranchNodeExecution partitions the BranchNode execution into a collection of Categorical and Reference Spans +// which are appended to the provided spans argument. func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, branchNode *core.BranchNode, spans *[]*admin.Span, depth int) error { @@ -197,6 +207,8 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, return nil } +// parseDynamicNodeExecution partitions the DynamicNode execution into a collection of Categorical and Reference Spans +// which are appended to the provided spans argument. func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ NodeExecutionId: nodeExecution.Id, @@ -263,6 +275,8 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec return nil } +// parseExecution partitions the workflow execution into a collection of Categorical and Reference Spans which are +// returned as a hierarchical breakdown of the workflow execution. func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Execution, depth int) (*admin.Span, error) { referenceSpan := &admin.ReferenceSpanInfo{ Id: &admin.ReferenceSpanInfo_WorkflowId{ @@ -324,6 +338,8 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex }, nil } +// parseGateNodeExecution partitions the GateNode execution into a collection of Categorical and Reference Spans +// which are appended to the provided spans argument. func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span) { // check if node has started yet if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, emptyTimestamp) { @@ -351,6 +367,8 @@ func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution } } +// parseLaunchPlanNodeExecution partitions the LaunchPlanNode execution into a collection of Categorical and Reference +// Spans which are appended to the provided spans argument. func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { // check if workflow started yet workflowNode := nodeExecution.Closure.GetWorkflowNodeMetadata() @@ -387,6 +405,8 @@ func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeE return nil } +// parseNodeExecution partitions the node execution into a collection of Categorical and Reference Spans which are +// returned as a hierarchical breakdown of the node execution. func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, node *core.Node, depth int) (*admin.Span, error) { referenceSpan := &admin.ReferenceSpanInfo{ Id: &admin.ReferenceSpanInfo_NodeId{ @@ -445,6 +465,8 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * }, nil } +// parseNodeExecutions partitions the node executions into a collection of Categorical and Reference Spans which are +// appended to the provided spans argument. func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions map[string]*admin.NodeExecution, compiledWorkflowClosure *core.CompiledWorkflowClosure, spans *[]*admin.Span, depth int) error { @@ -501,6 +523,8 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions return nil } +// parseSubworkflowNodeExecutions partitions the SubworkflowNode execution into a collection of Categorical and +// Reference Spans which are appended to the provided spans argument. func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, identifier *core.Identifier, spans *[]*admin.Span, depth int) error { @@ -548,6 +572,8 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, return nil } +// parseTaskExecution partitions the task execution into a collection of Categorical and Reference Spans which are +// returned as a hierarchical breakdown of the task execution. func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { spans := make([]*admin.Span, 0) @@ -592,6 +618,8 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { } } +// parseTaskExecutions partitions the task executions into a collection of Categorical and Reference Spans which are +// appended to the provided spans argument. func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { // sort task executions sort.Slice(taskExecutions, func(i, j int) bool { @@ -613,6 +641,8 @@ func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin. } } +// parseTaskNodeExecutions partitions the TaskNode execution into a collection of Categorical and Reference Spans which +// are appended to the provided spans argument. func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { // retrieve task executions taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ @@ -646,7 +676,8 @@ func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecuti return nil } -// TODO @hamersaw - docs +// GetExecutionMetrics returns a Span hierarchically breaking down the workflow execution into a collection of +// Categorical and Reference Spans. func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, request admin.WorkflowExecutionGetMetricsRequest) (*admin.WorkflowExecutionGetMetricsResponse, error) { @@ -665,7 +696,8 @@ func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, return &admin.WorkflowExecutionGetMetricsResponse{Span: span}, nil } -// TODO @hamersaw docs +// GetNodeExecutionMetrics returns a Span hierarchically breaking down the node execution into a collection of +// Categorical and Reference Spans. func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { @@ -710,6 +742,7 @@ func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, return &admin.NodeExecutionGetMetricsResponse{Span: span}, nil } +// NewMetricsManager returns a new MetricsManager constructed with the provided arguments. func NewMetricsManager( workflowManager interfaces.WorkflowInterface, executionManager interfaces.ExecutionInterface, From b5ebd32390b3e214fe348dc3a81cdc4988359c7c Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Thu, 23 Feb 2023 11:13:26 -0600 Subject: [PATCH 16/27] updated flyteidl dependency Signed-off-by: Daniel Rammer --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 4ccc4804f..ce9f40d89 100644 --- a/go.mod +++ b/go.mod @@ -209,4 +209,4 @@ require ( replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 -replace github.com/flyteorg/flyteidl => ../flyteidl +replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.8-0.20230221163116-ad6ed0375a8b diff --git a/go.sum b/go.sum index 51dfb9abe..fb171bb0f 100644 --- a/go.sum +++ b/go.sum @@ -308,6 +308,8 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/flyteorg/flyteidl v1.3.8-0.20230221163116-ad6ed0375a8b h1:utn+F1yUK3UCjdBNEEQpCn4cBTrOx5h3Hq2M2S7zyMw= +github.com/flyteorg/flyteidl v1.3.8-0.20230221163116-ad6ed0375a8b/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= From 7a4b3926ba9104366b0749ce07752c8096935871 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Thu, 23 Feb 2023 11:26:51 -0600 Subject: [PATCH 17/27] fixed unit tests Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index aad06c191..42fe99fda 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -181,8 +181,9 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, } node := getBranchNode(branchNodeExecution.Metadata.SpecNodeId, branchNode) - if node != nil { - return fmt.Errorf("failed to identify branch node final node definition") + if node == nil { + return fmt.Errorf("failed to identify branch node final node definition for nodeID '%s' and branchNode '%+v'", + branchNodeExecution.Metadata.SpecNodeId, branchNode) } // frontend overhead From 4797f9b5aec00f05a5499fd0e13704579414d4c6 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 24 Feb 2023 09:14:14 -0600 Subject: [PATCH 18/27] chnaged to local flyteidl for testing Signed-off-by: Daniel Rammer --- go.mod | 2 +- go.sum | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/go.mod b/go.mod index ce9f40d89..4ccc4804f 100644 --- a/go.mod +++ b/go.mod @@ -209,4 +209,4 @@ require ( replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 -replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.8-0.20230221163116-ad6ed0375a8b +replace github.com/flyteorg/flyteidl => ../flyteidl diff --git a/go.sum b/go.sum index fb171bb0f..51dfb9abe 100644 --- a/go.sum +++ b/go.sum @@ -308,8 +308,6 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flyteorg/flyteidl v1.3.8-0.20230221163116-ad6ed0375a8b h1:utn+F1yUK3UCjdBNEEQpCn4cBTrOx5h3Hq2M2S7zyMw= -github.com/flyteorg/flyteidl v1.3.8-0.20230221163116-ad6ed0375a8b/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= From 40d9a0a162476c29c0c63063deff0ecd8d467ecc Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 24 Feb 2023 09:29:28 -0600 Subject: [PATCH 19/27] using task event reported_at timestamps for updated_at in task execution models Signed-off-by: Daniel Rammer --- .../transformers/task_execution.go | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/pkg/repositories/transformers/task_execution.go b/pkg/repositories/transformers/task_execution.go index 0a09f0828..77ac25db0 100644 --- a/pkg/repositories/transformers/task_execution.go +++ b/pkg/repositories/transformers/task_execution.go @@ -139,9 +139,14 @@ func CreateTaskExecutionModel(ctx context.Context, input CreateTaskExecutionMode }) } + reportedAt := input.Request.Event.ReportedAt + if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0){ + reportedAt = input.Request.Event.OccurredAt + } + closure := &admin.TaskExecutionClosure{ Phase: input.Request.Event.Phase, - UpdatedAt: input.Request.Event.OccurredAt, + UpdatedAt: reportedAt, CreatedAt: input.Request.Event.OccurredAt, Logs: input.Request.Event.Logs, CustomInfo: input.Request.Event.CustomInfo, @@ -181,7 +186,11 @@ func CreateTaskExecutionModel(ctx context.Context, input CreateTaskExecutionMode return nil, errors.NewFlyteAdminErrorf(codes.Internal, "failed to read event timestamp") } taskExecution.TaskExecutionCreatedAt = &taskExecutionCreatedAt - taskExecution.TaskExecutionUpdatedAt = &taskExecutionCreatedAt + taskExecutionUpdatedAt, err := ptypes.Timestamp(reportedAt) + if err != nil { + return nil, errors.NewFlyteAdminErrorf(codes.Internal, "failed to read event reported_at timestamp") + } + taskExecution.TaskExecutionUpdatedAt = &taskExecutionUpdatedAt return taskExecution, nil } @@ -359,7 +368,11 @@ func UpdateTaskExecutionModel(ctx context.Context, request *admin.TaskExecutionE taskExecutionModel.Phase = request.Event.Phase.String() taskExecutionModel.PhaseVersion = request.Event.PhaseVersion taskExecutionClosure.Phase = request.Event.Phase - taskExecutionClosure.UpdatedAt = request.Event.OccurredAt + reportedAt := request.Event.ReportedAt + if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0){ + reportedAt = request.Event.OccurredAt + } + taskExecutionClosure.UpdatedAt = reportedAt taskExecutionClosure.Logs = mergeLogs(taskExecutionClosure.Logs, request.Event.Logs) if len(request.Event.Reason) > 0 { taskExecutionClosure.Reason = request.Event.Reason @@ -391,7 +404,7 @@ func UpdateTaskExecutionModel(ctx context.Context, request *admin.TaskExecutionE codes.Internal, "failed to marshal task execution closure with error: %v", err) } taskExecutionModel.Closure = marshaledClosure - updatedAt, err := ptypes.Timestamp(request.Event.OccurredAt) + updatedAt, err := ptypes.Timestamp(reportedAt) if err != nil { return errors.NewFlyteAdminErrorf(codes.Internal, "failed to parse updated at timestamp") } From a99cb8729492c2c837138cd563d9855e9aee86d3 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 28 Feb 2023 08:16:52 -0600 Subject: [PATCH 20/27] updated flyteidl Signed-off-by: Daniel Rammer --- go.mod | 2 +- go.sum | 2 ++ pkg/manager/impl/metrics_manager.go | 46 ------------------------ pkg/manager/impl/metrics_manager_test.go | 12 +++++++ pkg/rpc/adminservice/node_execution.go | 18 ---------- 5 files changed, 15 insertions(+), 65 deletions(-) diff --git a/go.mod b/go.mod index 4ccc4804f..3e72c44fc 100644 --- a/go.mod +++ b/go.mod @@ -209,4 +209,4 @@ require ( replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 -replace github.com/flyteorg/flyteidl => ../flyteidl +replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.9-0.20230228140239-67e37c1d0989 diff --git a/go.sum b/go.sum index 51dfb9abe..113ffd488 100644 --- a/go.sum +++ b/go.sum @@ -308,6 +308,8 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= +github.com/flyteorg/flyteidl v1.3.9-0.20230228140239-67e37c1d0989 h1:Eu8ONcBps1luw9bEj+dnLDzj1BaPeqliivXXXymWZl0= +github.com/flyteorg/flyteidl v1.3.9-0.20230228140239-67e37c1d0989/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 42fe99fda..6b874658f 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -697,52 +697,6 @@ func (m *MetricsManager) GetExecutionMetrics(ctx context.Context, return &admin.WorkflowExecutionGetMetricsResponse{Span: span}, nil } -// GetNodeExecutionMetrics returns a Span hierarchically breaking down the node execution into a collection of -// Categorical and Reference Spans. -func (m *MetricsManager) GetNodeExecutionMetrics(ctx context.Context, - request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { - - // retrieve node execution, workflow execution, and workflow - nodeRequest := admin.NodeExecutionGetRequest{Id: request.Id} - nodeExecution, err := m.nodeExecutionManager.GetNodeExecution(ctx, nodeRequest) - if err != nil { - return nil, err - } - - executionRequest := admin.WorkflowExecutionGetRequest{Id: nodeExecution.Id.ExecutionId} - execution, err := m.executionManager.GetExecution(ctx, executionRequest) - if err != nil { - return nil, err - } - - workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} - workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) - if err != nil { - return nil, err - } - - // get node definition from workflow - var node *core.Node - for _, n := range workflow.Closure.CompiledWorkflow.Primary.Template.Nodes { - if n.Id == nodeExecution.Metadata.SpecNodeId { - node = n - } - } - - if node == nil { - return nil, fmt.Errorf("failed to discover workflow node '%s' in workflow '%+v'", - nodeExecution.Metadata.SpecNodeId, workflow.Closure.CompiledWorkflow.Primary.Template.Id) - } - - // parse node execution - span, err := m.parseNodeExecution(ctx, nodeExecution, node, int(request.Depth)) - if err != nil { - return nil, err - } - - return &admin.NodeExecutionGetMetricsResponse{Span: span}, nil -} - // NewMetricsManager returns a new MetricsManager constructed with the provided arguments. func NewMetricsManager( workflowManager interfaces.WorkflowInterface, diff --git a/pkg/manager/impl/metrics_manager_test.go b/pkg/manager/impl/metrics_manager_test.go index f83d79c97..64e41c98e 100644 --- a/pkg/manager/impl/metrics_manager_test.go +++ b/pkg/manager/impl/metrics_manager_test.go @@ -214,6 +214,18 @@ func TestParseBranchNodeExecution(t *testing.T) { IfElse: &core.IfElseBlock{ Case: &core.IfBlock{ ThenNode: &core.Node{ + Id: "bar", + }, + }, + Other: []*core.IfBlock{ + &core.IfBlock{ + ThenNode: &core.Node{ + Id: "baz", + }, + }, + }, + Default: &core.IfElseBlock_ElseNode{ + &core.Node{ Id: "foo", Target: &core.Node_TaskNode{}, }, diff --git a/pkg/rpc/adminservice/node_execution.go b/pkg/rpc/adminservice/node_execution.go index f4a008d83..b4cabedf3 100644 --- a/pkg/rpc/adminservice/node_execution.go +++ b/pkg/rpc/adminservice/node_execution.go @@ -108,21 +108,3 @@ func (m *AdminService) GetNodeExecutionData( m.Metrics.nodeExecutionEndpointMetrics.getData.Success() return response, nil } - -func (m *AdminService) GetNodeExecutionMetrics( - ctx context.Context, request *admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { - defer m.interceptPanic(ctx, request) - if request == nil { - return nil, status.Errorf(codes.InvalidArgument, "Incorrect request, nil requests not allowed") - } - var response *admin.NodeExecutionGetMetricsResponse - var err error - m.Metrics.nodeExecutionEndpointMetrics.getMetrics.Time(func() { - response, err = m.MetricsManager.GetNodeExecutionMetrics(ctx, *request) - }) - if err != nil { - return nil, util.TransformAndRecordError(err, &m.Metrics.nodeExecutionEndpointMetrics.getMetrics) - } - m.Metrics.nodeExecutionEndpointMetrics.getMetrics.Success() - return response, nil -} From 9d83a0ada178b97b5c428763bd92acc2d0210a12 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 28 Feb 2023 08:38:47 -0600 Subject: [PATCH 21/27] fixed lint issues Signed-off-by: Daniel Rammer --- go.mod | 2 +- go.sum | 4 +- pkg/manager/impl/metrics_manager_test.go | 6 +-- pkg/manager/interfaces/metrics.go | 2 - pkg/manager/mocks/metrics_interface.go | 41 ------------------- .../transformers/task_execution.go | 4 +- 6 files changed, 8 insertions(+), 51 deletions(-) diff --git a/go.mod b/go.mod index 3e72c44fc..c29096ffd 100644 --- a/go.mod +++ b/go.mod @@ -209,4 +209,4 @@ require ( replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 -replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.9-0.20230228140239-67e37c1d0989 +replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.9-0.20230228143134-87a99debc95f diff --git a/go.sum b/go.sum index 113ffd488..de9b2eb0b 100644 --- a/go.sum +++ b/go.sum @@ -308,8 +308,8 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flyteorg/flyteidl v1.3.9-0.20230228140239-67e37c1d0989 h1:Eu8ONcBps1luw9bEj+dnLDzj1BaPeqliivXXXymWZl0= -github.com/flyteorg/flyteidl v1.3.9-0.20230228140239-67e37c1d0989/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= +github.com/flyteorg/flyteidl v1.3.9-0.20230228143134-87a99debc95f h1:47Z3knqAUnjBMDA3cdWtrIXuLlNsBKya8TS2pvZXqd8= +github.com/flyteorg/flyteidl v1.3.9-0.20230228143134-87a99debc95f/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= diff --git a/pkg/manager/impl/metrics_manager_test.go b/pkg/manager/impl/metrics_manager_test.go index 64e41c98e..abc0c60fa 100644 --- a/pkg/manager/impl/metrics_manager_test.go +++ b/pkg/manager/impl/metrics_manager_test.go @@ -214,18 +214,18 @@ func TestParseBranchNodeExecution(t *testing.T) { IfElse: &core.IfElseBlock{ Case: &core.IfBlock{ ThenNode: &core.Node{ - Id: "bar", + Id: "bar", }, }, Other: []*core.IfBlock{ &core.IfBlock{ ThenNode: &core.Node{ - Id: "baz", + Id: "baz", }, }, }, Default: &core.IfElseBlock_ElseNode{ - &core.Node{ + ElseNode: &core.Node{ Id: "foo", Target: &core.Node_TaskNode{}, }, diff --git a/pkg/manager/interfaces/metrics.go b/pkg/manager/interfaces/metrics.go index 84d9c9100..d726cdc99 100644 --- a/pkg/manager/interfaces/metrics.go +++ b/pkg/manager/interfaces/metrics.go @@ -12,6 +12,4 @@ import ( type MetricsInterface interface { GetExecutionMetrics(ctx context.Context, request admin.WorkflowExecutionGetMetricsRequest) ( *admin.WorkflowExecutionGetMetricsResponse, error) - GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) ( - *admin.NodeExecutionGetMetricsResponse, error) } diff --git a/pkg/manager/mocks/metrics_interface.go b/pkg/manager/mocks/metrics_interface.go index b29423d51..2e292593e 100644 --- a/pkg/manager/mocks/metrics_interface.go +++ b/pkg/manager/mocks/metrics_interface.go @@ -55,44 +55,3 @@ func (_m *MetricsInterface) GetExecutionMetrics(ctx context.Context, request adm return r0, r1 } - -type MetricsInterface_GetNodeExecutionMetrics struct { - *mock.Call -} - -func (_m MetricsInterface_GetNodeExecutionMetrics) Return(_a0 *admin.NodeExecutionGetMetricsResponse, _a1 error) *MetricsInterface_GetNodeExecutionMetrics { - return &MetricsInterface_GetNodeExecutionMetrics{Call: _m.Call.Return(_a0, _a1)} -} - -func (_m *MetricsInterface) OnGetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) *MetricsInterface_GetNodeExecutionMetrics { - c_call := _m.On("GetNodeExecutionMetrics", ctx, request) - return &MetricsInterface_GetNodeExecutionMetrics{Call: c_call} -} - -func (_m *MetricsInterface) OnGetNodeExecutionMetricsMatch(matchers ...interface{}) *MetricsInterface_GetNodeExecutionMetrics { - c_call := _m.On("GetNodeExecutionMetrics", matchers...) - return &MetricsInterface_GetNodeExecutionMetrics{Call: c_call} -} - -// GetNodeExecutionMetrics provides a mock function with given fields: ctx, request -func (_m *MetricsInterface) GetNodeExecutionMetrics(ctx context.Context, request admin.NodeExecutionGetMetricsRequest) (*admin.NodeExecutionGetMetricsResponse, error) { - ret := _m.Called(ctx, request) - - var r0 *admin.NodeExecutionGetMetricsResponse - if rf, ok := ret.Get(0).(func(context.Context, admin.NodeExecutionGetMetricsRequest) *admin.NodeExecutionGetMetricsResponse); ok { - r0 = rf(ctx, request) - } else { - if ret.Get(0) != nil { - r0 = ret.Get(0).(*admin.NodeExecutionGetMetricsResponse) - } - } - - var r1 error - if rf, ok := ret.Get(1).(func(context.Context, admin.NodeExecutionGetMetricsRequest) error); ok { - r1 = rf(ctx, request) - } else { - r1 = ret.Error(1) - } - - return r0, r1 -} diff --git a/pkg/repositories/transformers/task_execution.go b/pkg/repositories/transformers/task_execution.go index 77ac25db0..8c18f17ac 100644 --- a/pkg/repositories/transformers/task_execution.go +++ b/pkg/repositories/transformers/task_execution.go @@ -140,7 +140,7 @@ func CreateTaskExecutionModel(ctx context.Context, input CreateTaskExecutionMode } reportedAt := input.Request.Event.ReportedAt - if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0){ + if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0) { reportedAt = input.Request.Event.OccurredAt } @@ -369,7 +369,7 @@ func UpdateTaskExecutionModel(ctx context.Context, request *admin.TaskExecutionE taskExecutionModel.PhaseVersion = request.Event.PhaseVersion taskExecutionClosure.Phase = request.Event.Phase reportedAt := request.Event.ReportedAt - if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0){ + if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0) { reportedAt = request.Event.OccurredAt } taskExecutionClosure.UpdatedAt = reportedAt From fc97d72a6cbc3bde3b8e20c630769c179397b572 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Thu, 2 Mar 2023 12:16:26 -0600 Subject: [PATCH 22/27] using reported_at for node executions Signed-off-by: Daniel Rammer --- go.mod | 2 +- go.sum | 4 ++-- .../transformers/node_execution.go | 21 +++++++++++++++---- 3 files changed, 20 insertions(+), 7 deletions(-) diff --git a/go.mod b/go.mod index c29096ffd..66ae60d43 100644 --- a/go.mod +++ b/go.mod @@ -209,4 +209,4 @@ require ( replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 -replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.9-0.20230228143134-87a99debc95f +replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.9-0.20230302174150-9aae14fc0b45 diff --git a/go.sum b/go.sum index de9b2eb0b..bc28b4ebf 100644 --- a/go.sum +++ b/go.sum @@ -308,8 +308,8 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flyteorg/flyteidl v1.3.9-0.20230228143134-87a99debc95f h1:47Z3knqAUnjBMDA3cdWtrIXuLlNsBKya8TS2pvZXqd8= -github.com/flyteorg/flyteidl v1.3.9-0.20230228143134-87a99debc95f/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= +github.com/flyteorg/flyteidl v1.3.9-0.20230302174150-9aae14fc0b45 h1:kBjiXqRETsA5C5p0o/YwRhflLFRvakXKDZVzjtgdIE4= +github.com/flyteorg/flyteidl v1.3.9-0.20230302174150-9aae14fc0b45/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= diff --git a/pkg/repositories/transformers/node_execution.go b/pkg/repositories/transformers/node_execution.go index 83f9acbaf..da30fbe48 100644 --- a/pkg/repositories/transformers/node_execution.go +++ b/pkg/repositories/transformers/node_execution.go @@ -115,10 +115,15 @@ func CreateNodeExecutionModel(ctx context.Context, input ToNodeExecutionModelInp Phase: input.Request.Event.Phase.String(), } + reportedAt := input.Request.Event.ReportedAt + if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0) { + reportedAt = input.Request.Event.OccurredAt + } + closure := admin.NodeExecutionClosure{ Phase: input.Request.Event.Phase, CreatedAt: input.Request.Event.OccurredAt, - UpdatedAt: input.Request.Event.OccurredAt, + UpdatedAt: reportedAt, } nodeExecutionMetadata := admin.NodeExecutionMetaData{ @@ -161,7 +166,11 @@ func CreateNodeExecutionModel(ctx context.Context, input ToNodeExecutionModelInp return nil, errors.NewFlyteAdminErrorf(codes.Internal, "failed to read event timestamp") } nodeExecution.NodeExecutionCreatedAt = &nodeExecutionCreatedAt - nodeExecution.NodeExecutionUpdatedAt = &nodeExecutionCreatedAt + nodeExecutionUpdatedAt, err := ptypes.Timestamp(reportedAt) + if err != nil { + return nil, errors.NewFlyteAdminErrorf(codes.Internal, "failed to read event reported_at timestamp") + } + nodeExecution.NodeExecutionUpdatedAt = &nodeExecutionUpdatedAt if input.Request.Event.ParentTaskMetadata != nil { nodeExecution.ParentTaskExecutionID = input.ParentTaskExecutionID } @@ -195,7 +204,11 @@ func UpdateNodeExecutionModel( } nodeExecutionModel.Phase = request.Event.Phase.String() nodeExecutionClosure.Phase = request.Event.Phase - nodeExecutionClosure.UpdatedAt = request.Event.OccurredAt + reportedAt := request.Event.ReportedAt + if reportedAt == nil || (reportedAt.Seconds == 0 && reportedAt.Nanos == 0) { + reportedAt = request.Event.OccurredAt + } + nodeExecutionClosure.UpdatedAt = reportedAt if request.Event.Phase == core.NodeExecution_RUNNING { err := addNodeRunningState(request, nodeExecutionModel, &nodeExecutionClosure) @@ -242,7 +255,7 @@ func UpdateNodeExecutionModel( } nodeExecutionModel.Closure = marshaledClosure - updatedAt, err := ptypes.Timestamp(request.Event.OccurredAt) + updatedAt, err := ptypes.Timestamp(reportedAt) if err != nil { return errors.NewFlyteAdminErrorf(codes.Internal, "failed to parse updated at timestamp") } From 46f06914292b10cd54bd24b8ec6a253d563b5914 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Mar 2023 11:31:38 -0500 Subject: [PATCH 23/27] updated flyteidl Signed-off-by: Daniel Rammer --- go.mod | 2 +- go.sum | 4 +- pkg/manager/impl/metrics_manager.go | 64 ++++++++++++++--------------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/go.mod b/go.mod index e04d2f185..2d905d3df 100644 --- a/go.mod +++ b/go.mod @@ -209,4 +209,4 @@ require ( replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 -replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.9-0.20230302174150-9aae14fc0b45 +replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.13-0.20230317162424-18e93aafcc55 diff --git a/go.sum b/go.sum index f82b93109..86f6f9630 100644 --- a/go.sum +++ b/go.sum @@ -308,8 +308,8 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flyteorg/flyteidl v1.3.9-0.20230302174150-9aae14fc0b45 h1:kBjiXqRETsA5C5p0o/YwRhflLFRvakXKDZVzjtgdIE4= -github.com/flyteorg/flyteidl v1.3.9-0.20230302174150-9aae14fc0b45/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= +github.com/flyteorg/flyteidl v1.3.13-0.20230317162424-18e93aafcc55 h1:DtFdzztrUcgqULvf9xTH0ixzC3eJElHsW8ILuyRYqnI= +github.com/flyteorg/flyteidl v1.3.13-0.20230317162424-18e93aafcc55/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 6b874658f..3cd6fd6b8 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -53,7 +53,7 @@ func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category adm EndTime: endTime, Info: &admin.Span_Category{ Category: &admin.CategoricalSpanInfo{ - Category: category, + Category: category.String(), }, }, } @@ -168,7 +168,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, // check if the node started if len(nodeExecutions) == 0 { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) } else { // parse branchNode if len(nodeExecutions) != 1 { @@ -188,7 +188,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, // frontend overhead *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - branchNodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + branchNodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) // node execution nodeExecutionSpan, err := m.parseNodeExecution(ctx, branchNodeExecution, node, depth) @@ -201,7 +201,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, // backened overhead if !nodeExecution.Closure.UpdatedAt.AsTime().Before(branchNodeExecution.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(branchNodeExecution.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) } } @@ -222,11 +222,11 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec // if no task executions then everything is execution overhead if len(taskExecutions) == 0 { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) } else { // frontend overhead *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) // task execution(s) parseTaskExecutions(taskExecutions, spans, depth) @@ -244,13 +244,13 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec if len(nodeExecutions) == 0 { if !nodeExecution.Closure.UpdatedAt.AsTime().Before(lastTask.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(lastTask.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_RESET)) } } else { // between task execution(s) and node execution(s) overhead startNode := nodeExecutions["start-node"] *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_RESET)) // node execution(s) getDataRequest := admin.NodeExecutionGetDataRequest{Id: nodeExecution.Id} @@ -268,7 +268,7 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) } } } @@ -307,11 +307,11 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex startNode := nodeExecutions["start-node"] if startNode.Closure.UpdatedAt == nil || reflect.DeepEqual(startNode.Closure.UpdatedAt, emptyTimestamp) { spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, - execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_WORKFLOW_SETUP)) } else { // compute frontend overhead spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_WORKFLOW_SETUP)) // iterate over nodes and compute overhead if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, &spans, depth-1); err != nil { @@ -323,7 +323,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !execution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { spans = append(spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_WORKFLOW_TEARDOWN)) } } @@ -345,25 +345,25 @@ func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution // check if node has started yet if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, emptyTimestamp) { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) } else { // frontend overhead *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.StartedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.StartedAt, admin.CategoricalSpanInfo_NODE_SETUP)) // check if plugin has completed yet if nodeExecution.Closure.Duration == nil || reflect.DeepEqual(nodeExecution.Closure.Duration, emptyDuration) { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_IDLE)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_IDLE)) } else { // idle time nodeEndTime := timestamppb.New(nodeExecution.Closure.StartedAt.AsTime().Add(nodeExecution.Closure.Duration.AsDuration())) *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, - nodeEndTime, admin.CategoricalSpanInfo_EXECUTION_IDLE)) + nodeEndTime, admin.CategoricalSpanInfo_NODE_IDLE)) // backend overhead *spans = append(*spans, createCategoricalSpan(nodeEndTime, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) } } } @@ -375,7 +375,7 @@ func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeE workflowNode := nodeExecution.Closure.GetWorkflowNodeMetadata() if workflowNode == nil { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) } else { // retrieve execution executionRequest := admin.WorkflowExecutionGetRequest{Id: workflowNode.ExecutionId} @@ -386,7 +386,7 @@ func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeE // frontend overhead *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - execution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + execution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) // execution span, err := m.parseExecution(ctx, execution, depth) @@ -399,7 +399,7 @@ func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeE // backend overhead if !nodeExecution.Closure.UpdatedAt.AsTime().Before(execution.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(execution.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) } } @@ -542,12 +542,12 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, // check if the subworkflow started if len(nodeExecutions) == 0 { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) } else { // frontend overhead startNode := nodeExecutions["start-node"] *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) // retrieve workflow workflowRequest := admin.ObjectGetRequest{Id: identifier} @@ -566,7 +566,7 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) } } @@ -581,26 +581,26 @@ func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { // check if plugin has started yet if taskExecution.Closure.StartedAt == nil || reflect.DeepEqual(taskExecution.Closure.StartedAt, emptyTimestamp) { spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, - taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_TASK_SETUP)) } else { // frontend overhead spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, - taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_TASK_SETUP)) // check if plugin has completed yet if taskExecution.Closure.Duration == nil || reflect.DeepEqual(taskExecution.Closure.Duration, emptyDuration) { spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, - taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_RUNTIME)) + taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_TASK_RUNTIME)) } else { // plugin execution taskEndTime := timestamppb.New(taskExecution.Closure.StartedAt.AsTime().Add(taskExecution.Closure.Duration.AsDuration())) spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, - taskEndTime, admin.CategoricalSpanInfo_PLUGIN_RUNTIME)) + taskEndTime, admin.CategoricalSpanInfo_TASK_RUNTIME)) // backend overhead if !taskExecution.Closure.UpdatedAt.AsTime().Before(taskEndTime.AsTime()) { spans = append(spans, createCategoricalSpan(taskEndTime, - taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_PLUGIN_OVERHEAD)) + taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_TASK_TEARDOWN)) } } } @@ -633,7 +633,7 @@ func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin. for index, taskExecution := range taskExecutions { if index > 0 { *spans = append(*spans, createCategoricalSpan(taskExecutions[index-1].Closure.UpdatedAt, - taskExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + taskExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_RESET)) } if depth != 0 { @@ -657,11 +657,11 @@ func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecuti // if no task executions then everything is execution overhead if len(taskExecutions) == 0 { *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) } else { // frontend overhead *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) // parse task executions parseTaskExecutions(taskExecutions, spans, depth) @@ -670,7 +670,7 @@ func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecuti lastTask := taskExecutions[len(taskExecutions)-1] if !nodeExecution.Closure.UpdatedAt.AsTime().Before(lastTask.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_EXECUTION_OVERHEAD)) + nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) } } From a81e9319b4a99b85f1fb215990700da68b6ea591 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Fri, 17 Mar 2023 12:23:33 -0500 Subject: [PATCH 24/27] fixes unit tests and lint issues Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager_test.go | 131 ++++++++++++----------- 1 file changed, 70 insertions(+), 61 deletions(-) diff --git a/pkg/manager/impl/metrics_manager_test.go b/pkg/manager/impl/metrics_manager_test.go index abc0c60fa..2d573111c 100644 --- a/pkg/manager/impl/metrics_manager_test.go +++ b/pkg/manager/impl/metrics_manager_test.go @@ -87,8 +87,8 @@ func getMockWorkflowManager(workflow *admin.Workflow) interfaces.WorkflowInterfa return &mockWorkflowManager } -func parseSpansInfo(spans []*admin.Span) (map[admin.CategoricalSpanInfo_Category][]int64, int) { - categoryDurations := make(map[admin.CategoricalSpanInfo_Category][]int64) +func parseSpansInfo(spans []*admin.Span) (map[string][]int64, int) { + categoryDurations := make(map[string][]int64) referenceCount := 0 for _, span := range spans { switch info := span.Info.(type) { @@ -113,7 +113,7 @@ func TestParseBranchNodeExecution(t *testing.T) { name string nodeExecution *admin.NodeExecution nodeExecutions []*admin.NodeExecution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 referenceCount int }{ { @@ -130,8 +130,8 @@ func TestParseBranchNodeExecution(t *testing.T) { }, }, nil, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, }, 0, }, @@ -161,8 +161,8 @@ func TestParseBranchNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, }, 1, }, @@ -192,8 +192,9 @@ func TestParseBranchNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 20}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{20}, }, 1, }, @@ -251,7 +252,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { nodeExecution *admin.NodeExecution taskExecutions []*admin.TaskExecution nodeExecutions []*admin.NodeExecution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 referenceCount int }{ { @@ -266,8 +267,8 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, nil, nil, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, }, 0, }, @@ -295,8 +296,8 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, }, nil, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, }, 1, }, @@ -347,8 +348,9 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_RESET.String(): []int64{15}, }, 2, }, @@ -399,8 +401,10 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15, 20}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_RESET.String(): []int64{15}, + admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{20}, }, 2, }, @@ -458,7 +462,7 @@ func TestParseGateNodeExecution(t *testing.T) { tests := []struct { name string nodeExecution *admin.NodeExecution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 }{ { "NotStarted", @@ -470,8 +474,8 @@ func TestParseGateNodeExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 5), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, }, }, { @@ -484,9 +488,9 @@ func TestParseGateNodeExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 15), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, - admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_IDLE.String(): []int64{5}, }, }, { @@ -499,9 +503,10 @@ func TestParseGateNodeExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 425), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, - admin.CategoricalSpanInfo_EXECUTION_IDLE: []int64{400}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_IDLE.String(): []int64{400}, + admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{15}, }, }, } @@ -527,7 +532,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { name string nodeExecution *admin.NodeExecution execution *admin.Execution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 referenceCount int }{ { @@ -541,8 +546,8 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { }, }, nil, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, }, 0, }, @@ -569,8 +574,8 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 15), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, }, 1, }, @@ -597,8 +602,9 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 425), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{15}, }, 1, }, @@ -686,7 +692,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { name string nodeExecution *admin.NodeExecution nodeExecutions []*admin.NodeExecution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 referenceCount int }{ { @@ -703,8 +709,8 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, }, nil, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, }, 0, }, @@ -745,8 +751,8 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, }, 1, }, @@ -787,8 +793,9 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 20}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{20}, }, 1, }, @@ -849,7 +856,7 @@ func TestParseTaskExecution(t *testing.T) { tests := []struct { name string taskExecution *admin.TaskExecution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 }{ { "NotStarted", @@ -861,8 +868,8 @@ func TestParseTaskExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 5), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_TASK_SETUP.String(): []int64{5}, }, }, { @@ -875,9 +882,9 @@ func TestParseTaskExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 605), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5}, - admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{600}, + map[string][]int64{ + admin.CategoricalSpanInfo_TASK_SETUP.String(): []int64{5}, + admin.CategoricalSpanInfo_TASK_RUNTIME.String(): []int64{600}, }, }, { @@ -890,9 +897,10 @@ func TestParseTaskExecution(t *testing.T) { UpdatedAt: addTimestamp(baseTimestamp, 415), }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_PLUGIN_OVERHEAD: []int64{5, 10}, - admin.CategoricalSpanInfo_PLUGIN_RUNTIME: []int64{400}, + map[string][]int64{ + admin.CategoricalSpanInfo_TASK_SETUP.String(): []int64{5}, + admin.CategoricalSpanInfo_TASK_RUNTIME.String(): []int64{400}, + admin.CategoricalSpanInfo_TASK_TEARDOWN.String(): []int64{10}, }, }, } @@ -916,7 +924,7 @@ func TestParseTaskExecutions(t *testing.T) { tests := []struct { name string taskExecutions []*admin.TaskExecution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 referenceCount int }{ { @@ -931,7 +939,7 @@ func TestParseTaskExecutions(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{}, + map[string][]int64{}, 1, }, { @@ -954,8 +962,8 @@ func TestParseTaskExecutions(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{20}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_RESET.String(): []int64{20}, }, 2, }, @@ -980,7 +988,7 @@ func TestParseTaskNodeExecution(t *testing.T) { name string nodeExecution *admin.NodeExecution taskExecutions []*admin.TaskExecution - categoryDurations map[admin.CategoricalSpanInfo_Category][]int64 + categoryDurations map[string][]int64 referenceCount int }{ { @@ -994,8 +1002,8 @@ func TestParseTaskNodeExecution(t *testing.T) { }, }, nil, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{5}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, }, 0, }, @@ -1019,8 +1027,8 @@ func TestParseTaskNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, }, 1, }, @@ -1044,8 +1052,9 @@ func TestParseTaskNodeExecution(t *testing.T) { }, }, }, - map[admin.CategoricalSpanInfo_Category][]int64{ - admin.CategoricalSpanInfo_EXECUTION_OVERHEAD: []int64{10, 15}, + map[string][]int64{ + admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{15}, }, 1, }, From c8d206a9c7bece786ec236eeb7a90554d0f23eca Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Tue, 21 Mar 2023 13:27:49 -0500 Subject: [PATCH 25/27] updated flyteidl Signed-off-by: Daniel Rammer --- go.mod | 2 +- go.sum | 4 +- pkg/manager/impl/metrics_manager.go | 210 ++++++++++------------ pkg/manager/impl/metrics_manager_test.go | 212 +++++++++++------------ 4 files changed, 196 insertions(+), 232 deletions(-) diff --git a/go.mod b/go.mod index 2d905d3df..3c865485b 100644 --- a/go.mod +++ b/go.mod @@ -209,4 +209,4 @@ require ( replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 -replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.13-0.20230317162424-18e93aafcc55 +replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.13-0.20230321181500-b7fd869e65be diff --git a/go.sum b/go.sum index 86f6f9630..61e3e0d93 100644 --- a/go.sum +++ b/go.sum @@ -308,8 +308,8 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flyteorg/flyteidl v1.3.13-0.20230317162424-18e93aafcc55 h1:DtFdzztrUcgqULvf9xTH0ixzC3eJElHsW8ILuyRYqnI= -github.com/flyteorg/flyteidl v1.3.13-0.20230317162424-18e93aafcc55/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= +github.com/flyteorg/flyteidl v1.3.13-0.20230321181500-b7fd869e65be h1:zGGUhPqIa+6wD9pPlV28i37zXHQgu7feF4a9kJ4mDuo= +github.com/flyteorg/flyteidl v1.3.13-0.20230321181500-b7fd869e65be/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.20 h1:8ZGN2c0iaZa3d/UmN2VYozLBRhthAIO48aD5g8Wly7s= github.com/flyteorg/flyteplugins v1.0.20/go.mod h1:ZbZVBxEWh8Icj1AgfNKg0uPzHHGd9twa4eWcY2Yt6xE= github.com/flyteorg/flytepropeller v1.1.51 h1:ITPH2Fqx+/1hKBFnfb6Rawws3VbEJ3tQ/1tQXSIXvcQ= diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 3cd6fd6b8..1b9a0be18 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -20,7 +20,20 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" ) -const RequestLimit uint32 = 50 +const ( + RequestLimit uint32 = 50 + + nodeIdle = "NODE_IDLE" + nodeReset = "NODE_RESET" + nodeSetup = "NODE_SETUP" + nodeTeardown = "NODE_TEARDOWN" + nodeTransition = "NODE_TRANSITION" + taskRuntime = "TASK_RUNTIME" + taskSetup = "TASK_SETUP" + taskTeardown = "TASK_TEARDOWN" + workflowSetup = "WORKFLOW_SETUP" + workflowTeardown = "WORKFLOW_TEARDOWN" +) var ( emptyDuration *duration.Duration = &duration.Duration{ @@ -46,15 +59,13 @@ type MetricsManager struct { metrics metrics } -// createCategoricalSpan returns a Span defined by the provided arguments. -func createCategoricalSpan(startTime, endTime *timestamp.Timestamp, category admin.CategoricalSpanInfo_Category) *admin.Span { - return &admin.Span{ +// createOperationSpan returns a Span defined by the provided arguments. +func createOperationSpan(startTime, endTime *timestamp.Timestamp, operation string) *core.Span { + return &core.Span{ StartTime: startTime, EndTime: endTime, - Info: &admin.Span_Category{ - Category: &admin.CategoricalSpanInfo{ - Category: category.String(), - }, + Id: &core.Span_OperationId{ + OperationId: operation, }, } } @@ -153,7 +164,7 @@ func (m *MetricsManager) getTaskExecutions(ctx context.Context, request admin.Ta // parseBranchNodeExecution partitions the BranchNode execution into a collection of Categorical and Reference Spans // which are appended to the provided spans argument. func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, - nodeExecution *admin.NodeExecution, branchNode *core.BranchNode, spans *[]*admin.Span, depth int) error { + nodeExecution *admin.NodeExecution, branchNode *core.BranchNode, spans *[]*core.Span, depth int) error { // retrieve node execution(s) nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ @@ -167,8 +178,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, // check if the node started if len(nodeExecutions) == 0 { - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, nodeSetup)) } else { // parse branchNode if len(nodeExecutions) != 1 { @@ -187,8 +197,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, } // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - branchNodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, branchNodeExecution.Closure.CreatedAt, nodeSetup)) // node execution nodeExecutionSpan, err := m.parseNodeExecution(ctx, branchNodeExecution, node, depth) @@ -200,8 +209,8 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, // backened overhead if !nodeExecution.Closure.UpdatedAt.AsTime().Before(branchNodeExecution.Closure.UpdatedAt.AsTime()) { - *spans = append(*spans, createCategoricalSpan(branchNodeExecution.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) + *spans = append(*spans, createOperationSpan(branchNodeExecution.Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, nodeTeardown)) } } @@ -210,7 +219,7 @@ func (m *MetricsManager) parseBranchNodeExecution(ctx context.Context, // parseDynamicNodeExecution partitions the DynamicNode execution into a collection of Categorical and Reference Spans // which are appended to the provided spans argument. -func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { +func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*core.Span, depth int) error { taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ NodeExecutionId: nodeExecution.Id, Limit: RequestLimit, @@ -221,12 +230,10 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec // if no task executions then everything is execution overhead if len(taskExecutions) == 0 { - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, nodeSetup)) } else { // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, taskExecutions[0].Closure.CreatedAt, nodeSetup)) // task execution(s) parseTaskExecutions(taskExecutions, spans, depth) @@ -243,14 +250,13 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec lastTask := taskExecutions[len(taskExecutions)-1] if len(nodeExecutions) == 0 { if !nodeExecution.Closure.UpdatedAt.AsTime().Before(lastTask.Closure.UpdatedAt.AsTime()) { - *spans = append(*spans, createCategoricalSpan(lastTask.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_RESET)) + *spans = append(*spans, createOperationSpan(lastTask.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, nodeReset)) } } else { // between task execution(s) and node execution(s) overhead startNode := nodeExecutions["start-node"] - *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_RESET)) + *spans = append(*spans, createOperationSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + startNode.Closure.UpdatedAt, nodeReset)) // node execution(s) getDataRequest := admin.NodeExecutionGetDataRequest{Id: nodeExecution.Id} @@ -267,8 +273,7 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { - *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) + *spans = append(*spans, createOperationSpan(latestUpstreamNode.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, nodeTeardown)) } } } @@ -278,16 +283,9 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec // parseExecution partitions the workflow execution into a collection of Categorical and Reference Spans which are // returned as a hierarchical breakdown of the workflow execution. -func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Execution, depth int) (*admin.Span, error) { - referenceSpan := &admin.ReferenceSpanInfo{ - Id: &admin.ReferenceSpanInfo_WorkflowId{ - WorkflowId: execution.Id, - }, - } - +func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Execution, depth int) (*core.Span, error) { + spans := make([]*core.Span, 0) if depth != 0 { - spans := make([]*admin.Span, 0) - // retrieve workflow and node executions workflowRequest := admin.ObjectGetRequest{Id: execution.Closure.WorkflowId} workflow, err := m.workflowManager.GetWorkflow(ctx, workflowRequest) @@ -306,12 +304,10 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex // check if workflow has started startNode := nodeExecutions["start-node"] if startNode.Closure.UpdatedAt == nil || reflect.DeepEqual(startNode.Closure.UpdatedAt, emptyTimestamp) { - spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, - execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_WORKFLOW_SETUP)) + spans = append(spans, createOperationSpan(execution.Closure.CreatedAt, execution.Closure.UpdatedAt, workflowSetup)) } else { // compute frontend overhead - spans = append(spans, createCategoricalSpan(execution.Closure.CreatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_WORKFLOW_SETUP)) + spans = append(spans, createOperationSpan(execution.Closure.CreatedAt, startNode.Closure.UpdatedAt, workflowSetup)) // iterate over nodes and compute overhead if err := m.parseNodeExecutions(ctx, nodeExecutions, workflow.Closure.CompiledWorkflow, &spans, depth-1); err != nil { @@ -322,60 +318,54 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !execution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { - spans = append(spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - execution.Closure.UpdatedAt, admin.CategoricalSpanInfo_WORKFLOW_TEARDOWN)) + spans = append(spans, createOperationSpan(latestUpstreamNode.Closure.UpdatedAt, + execution.Closure.UpdatedAt, workflowTeardown)) } } - - referenceSpan.Spans = spans } - return &admin.Span{ + return &core.Span{ StartTime: execution.Closure.CreatedAt, EndTime: execution.Closure.UpdatedAt, - Info: &admin.Span_Reference{ - Reference: referenceSpan, + Id: &core.Span_WorkflowId{ + WorkflowId: execution.Id, }, + Spans: spans, }, nil } // parseGateNodeExecution partitions the GateNode execution into a collection of Categorical and Reference Spans // which are appended to the provided spans argument. -func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span) { +func (m *MetricsManager) parseGateNodeExecution(_ context.Context, nodeExecution *admin.NodeExecution, spans *[]*core.Span) { // check if node has started yet if nodeExecution.Closure.StartedAt == nil || reflect.DeepEqual(nodeExecution.Closure.StartedAt, emptyTimestamp) { - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, nodeSetup)) } else { // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.StartedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.StartedAt, nodeSetup)) // check if plugin has completed yet if nodeExecution.Closure.Duration == nil || reflect.DeepEqual(nodeExecution.Closure.Duration, emptyDuration) { - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_IDLE)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.StartedAt, + nodeExecution.Closure.UpdatedAt, nodeIdle)) } else { // idle time nodeEndTime := timestamppb.New(nodeExecution.Closure.StartedAt.AsTime().Add(nodeExecution.Closure.Duration.AsDuration())) - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.StartedAt, - nodeEndTime, admin.CategoricalSpanInfo_NODE_IDLE)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.StartedAt, nodeEndTime, nodeIdle)) // backend overhead - *spans = append(*spans, createCategoricalSpan(nodeEndTime, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) + *spans = append(*spans, createOperationSpan(nodeEndTime, nodeExecution.Closure.UpdatedAt, nodeTeardown)) } } } // parseLaunchPlanNodeExecution partitions the LaunchPlanNode execution into a collection of Categorical and Reference // Spans which are appended to the provided spans argument. -func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { +func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*core.Span, depth int) error { // check if workflow started yet workflowNode := nodeExecution.Closure.GetWorkflowNodeMetadata() if workflowNode == nil { - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, nodeSetup)) } else { // retrieve execution executionRequest := admin.WorkflowExecutionGetRequest{Id: workflowNode.ExecutionId} @@ -385,8 +375,7 @@ func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeE } // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - execution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, execution.Closure.CreatedAt, nodeSetup)) // execution span, err := m.parseExecution(ctx, execution, depth) @@ -398,8 +387,7 @@ func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeE // backend overhead if !nodeExecution.Closure.UpdatedAt.AsTime().Before(execution.Closure.UpdatedAt.AsTime()) { - *spans = append(*spans, createCategoricalSpan(execution.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) + *spans = append(*spans, createOperationSpan(execution.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, nodeTeardown)) } } @@ -408,15 +396,9 @@ func (m *MetricsManager) parseLaunchPlanNodeExecution(ctx context.Context, nodeE // parseNodeExecution partitions the node execution into a collection of Categorical and Reference Spans which are // returned as a hierarchical breakdown of the node execution. -func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, node *core.Node, depth int) (*admin.Span, error) { - referenceSpan := &admin.ReferenceSpanInfo{ - Id: &admin.ReferenceSpanInfo_NodeId{ - NodeId: nodeExecution.Id, - }, - } - +func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, node *core.Node, depth int) (*core.Span, error) { + spans := make([]*core.Span, 0) if depth != 0 { - spans := make([]*admin.Span, 0) // parse node var err error @@ -453,23 +435,22 @@ func (m *MetricsManager) parseNodeExecution(ctx context.Context, nodeExecution * if err != nil { return nil, err } - - referenceSpan.Spans = spans } - return &admin.Span{ + return &core.Span{ StartTime: nodeExecution.Closure.CreatedAt, EndTime: nodeExecution.Closure.UpdatedAt, - Info: &admin.Span_Reference{ - Reference: referenceSpan, + Id: &core.Span_NodeId{ + NodeId: nodeExecution.Id, }, + Spans: spans, }, nil } // parseNodeExecutions partitions the node executions into a collection of Categorical and Reference Spans which are // appended to the provided spans argument. func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions map[string]*admin.NodeExecution, - compiledWorkflowClosure *core.CompiledWorkflowClosure, spans *[]*admin.Span, depth int) error { + compiledWorkflowClosure *core.CompiledWorkflowClosure, spans *[]*core.Span, depth int) error { // sort node executions sortedNodeExecutions := make([]*admin.NodeExecution, 0, len(nodeExecutions)) @@ -508,14 +489,12 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions return err } - // prepend nodeExecution spans with NODE_TRANSITION time - if referenceSpan, ok := nodeExecutionSpan.Info.(*admin.Span_Reference); ok { - latestUpstreamNode := m.getLatestUpstreamNodeExecution(specNodeID, - compiledWorkflowClosure.Primary.Connections.Upstream, nodeExecutions) - if latestUpstreamNode != nil { - referenceSpan.Reference.Spans = append([]*admin.Span{createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_TRANSITION)}, referenceSpan.Reference.Spans...) - } + // prepend nodeExecution spans with node transition time + latestUpstreamNode := m.getLatestUpstreamNodeExecution(specNodeID, + compiledWorkflowClosure.Primary.Connections.Upstream, nodeExecutions) + if latestUpstreamNode != nil { + nodeExecutionSpan.Spans = append([]*core.Span{createOperationSpan(latestUpstreamNode.Closure.UpdatedAt, + nodeExecution.Closure.CreatedAt, nodeTransition)}, nodeExecutionSpan.Spans...) } *spans = append(*spans, nodeExecutionSpan) @@ -527,7 +506,7 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions // parseSubworkflowNodeExecutions partitions the SubworkflowNode execution into a collection of Categorical and // Reference Spans which are appended to the provided spans argument. func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, - nodeExecution *admin.NodeExecution, identifier *core.Identifier, spans *[]*admin.Span, depth int) error { + nodeExecution *admin.NodeExecution, identifier *core.Identifier, spans *[]*core.Span, depth int) error { // retrieve node execution(s) nodeExecutions, err := m.getNodeExecutions(ctx, admin.NodeExecutionListRequest{ @@ -541,13 +520,11 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, // check if the subworkflow started if len(nodeExecutions) == 0 { - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, nodeSetup)) } else { // frontend overhead startNode := nodeExecutions["start-node"] - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - startNode.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, startNode.Closure.UpdatedAt, nodeSetup)) // retrieve workflow workflowRequest := admin.ObjectGetRequest{Id: identifier} @@ -565,8 +542,7 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { - *spans = append(*spans, createCategoricalSpan(latestUpstreamNode.Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) + *spans = append(*spans, createOperationSpan(latestUpstreamNode.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, nodeTeardown)) } } @@ -575,53 +551,44 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, // parseTaskExecution partitions the task execution into a collection of Categorical and Reference Spans which are // returned as a hierarchical breakdown of the task execution. -func parseTaskExecution(taskExecution *admin.TaskExecution) *admin.Span { - spans := make([]*admin.Span, 0) +func parseTaskExecution(taskExecution *admin.TaskExecution) *core.Span { + spans := make([]*core.Span, 0) // check if plugin has started yet if taskExecution.Closure.StartedAt == nil || reflect.DeepEqual(taskExecution.Closure.StartedAt, emptyTimestamp) { - spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, - taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_TASK_SETUP)) + spans = append(spans, createOperationSpan(taskExecution.Closure.CreatedAt, taskExecution.Closure.UpdatedAt, taskSetup)) } else { // frontend overhead - spans = append(spans, createCategoricalSpan(taskExecution.Closure.CreatedAt, - taskExecution.Closure.StartedAt, admin.CategoricalSpanInfo_TASK_SETUP)) + spans = append(spans, createOperationSpan(taskExecution.Closure.CreatedAt, taskExecution.Closure.StartedAt, taskSetup)) // check if plugin has completed yet if taskExecution.Closure.Duration == nil || reflect.DeepEqual(taskExecution.Closure.Duration, emptyDuration) { - spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, - taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_TASK_RUNTIME)) + spans = append(spans, createOperationSpan(taskExecution.Closure.StartedAt, taskExecution.Closure.UpdatedAt, taskRuntime)) } else { // plugin execution taskEndTime := timestamppb.New(taskExecution.Closure.StartedAt.AsTime().Add(taskExecution.Closure.Duration.AsDuration())) - spans = append(spans, createCategoricalSpan(taskExecution.Closure.StartedAt, - taskEndTime, admin.CategoricalSpanInfo_TASK_RUNTIME)) + spans = append(spans, createOperationSpan(taskExecution.Closure.StartedAt, taskEndTime, taskRuntime)) // backend overhead if !taskExecution.Closure.UpdatedAt.AsTime().Before(taskEndTime.AsTime()) { - spans = append(spans, createCategoricalSpan(taskEndTime, - taskExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_TASK_TEARDOWN)) + spans = append(spans, createOperationSpan(taskEndTime, taskExecution.Closure.UpdatedAt, taskTeardown)) } } } - return &admin.Span{ + return &core.Span{ StartTime: taskExecution.Closure.CreatedAt, EndTime: taskExecution.Closure.UpdatedAt, - Info: &admin.Span_Reference{ - Reference: &admin.ReferenceSpanInfo{ - Id: &admin.ReferenceSpanInfo_TaskId{ - TaskId: taskExecution.Id, - }, - Spans: spans, - }, + Id: &core.Span_TaskId{ + TaskId: taskExecution.Id, }, + Spans: spans, } } // parseTaskExecutions partitions the task executions into a collection of Categorical and Reference Spans which are // appended to the provided spans argument. -func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin.Span, depth int) { +func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*core.Span, depth int) { // sort task executions sort.Slice(taskExecutions, func(i, j int) bool { x := taskExecutions[i].Closure.CreatedAt.AsTime() @@ -632,8 +599,7 @@ func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin. // iterate over task executions for index, taskExecution := range taskExecutions { if index > 0 { - *spans = append(*spans, createCategoricalSpan(taskExecutions[index-1].Closure.UpdatedAt, - taskExecution.Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_RESET)) + *spans = append(*spans, createOperationSpan(taskExecutions[index-1].Closure.UpdatedAt, taskExecution.Closure.CreatedAt, nodeReset)) } if depth != 0 { @@ -644,7 +610,7 @@ func parseTaskExecutions(taskExecutions []*admin.TaskExecution, spans *[]*admin. // parseTaskNodeExecutions partitions the TaskNode execution into a collection of Categorical and Reference Spans which // are appended to the provided spans argument. -func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*admin.Span, depth int) error { +func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecution *admin.NodeExecution, spans *[]*core.Span, depth int) error { // retrieve task executions taskExecutions, err := m.getTaskExecutions(ctx, admin.TaskExecutionListRequest{ NodeExecutionId: nodeExecution.Id, @@ -656,12 +622,10 @@ func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecuti // if no task executions then everything is execution overhead if len(taskExecutions) == 0 { - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, nodeSetup)) } else { // frontend overhead - *spans = append(*spans, createCategoricalSpan(nodeExecution.Closure.CreatedAt, - taskExecutions[0].Closure.CreatedAt, admin.CategoricalSpanInfo_NODE_SETUP)) + *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, taskExecutions[0].Closure.CreatedAt, nodeSetup)) // parse task executions parseTaskExecutions(taskExecutions, spans, depth) @@ -669,8 +633,8 @@ func (m *MetricsManager) parseTaskNodeExecution(ctx context.Context, nodeExecuti // backend overhead lastTask := taskExecutions[len(taskExecutions)-1] if !nodeExecution.Closure.UpdatedAt.AsTime().Before(lastTask.Closure.UpdatedAt.AsTime()) { - *spans = append(*spans, createCategoricalSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, - nodeExecution.Closure.UpdatedAt, admin.CategoricalSpanInfo_NODE_TEARDOWN)) + *spans = append(*spans, createOperationSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, + nodeExecution.Closure.UpdatedAt, nodeTeardown)) } } diff --git a/pkg/manager/impl/metrics_manager_test.go b/pkg/manager/impl/metrics_manager_test.go index 2d573111c..2958285b8 100644 --- a/pkg/manager/impl/metrics_manager_test.go +++ b/pkg/manager/impl/metrics_manager_test.go @@ -87,34 +87,34 @@ func getMockWorkflowManager(workflow *admin.Workflow) interfaces.WorkflowInterfa return &mockWorkflowManager } -func parseSpansInfo(spans []*admin.Span) (map[string][]int64, int) { - categoryDurations := make(map[string][]int64) +func parseSpans(spans []*core.Span) (map[string][]int64, int) { + operationDurations := make(map[string][]int64) referenceCount := 0 for _, span := range spans { - switch info := span.Info.(type) { - case *admin.Span_Category: - category := info.Category.Category + switch id := span.Id.(type) { + case *core.Span_OperationId: + operationID := id.OperationId duration := span.EndTime.Seconds - span.StartTime.Seconds - if array, exists := categoryDurations[category]; exists { - categoryDurations[category] = append(array, duration) + if array, exists := operationDurations[operationID]; exists { + operationDurations[operationID] = append(array, duration) } else { - categoryDurations[category] = []int64{duration} + operationDurations[operationID] = []int64{duration} } - case *admin.Span_Reference: + default: referenceCount++ } } - return categoryDurations, referenceCount + return operationDurations, referenceCount } func TestParseBranchNodeExecution(t *testing.T) { tests := []struct { - name string - nodeExecution *admin.NodeExecution - nodeExecutions []*admin.NodeExecution - categoryDurations map[string][]int64 - referenceCount int + name string + nodeExecution *admin.NodeExecution + nodeExecutions []*admin.NodeExecution + operationDurations map[string][]int64 + referenceCount int }{ { "NotStarted", @@ -131,7 +131,7 @@ func TestParseBranchNodeExecution(t *testing.T) { }, nil, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, + nodeSetup: []int64{5}, }, 0, }, @@ -162,7 +162,7 @@ func TestParseBranchNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + nodeSetup: []int64{10}, }, 1, }, @@ -193,8 +193,8 @@ func TestParseBranchNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{20}, + nodeSetup: []int64{10}, + nodeTeardown: []int64{20}, }, 1, }, @@ -234,13 +234,13 @@ func TestParseBranchNodeExecution(t *testing.T) { }, } - spans := make([]*admin.Span, 0) + spans := make([]*core.Span, 0) err := metricsManager.parseBranchNodeExecution(context.TODO(), test.nodeExecution, branchNode, &spans, -1) assert.Nil(t, err) // validate spans - categoryDurations, referenceCount := parseSpansInfo(spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, referenceCount := parseSpans(spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) assert.Equal(t, test.referenceCount, referenceCount) }) } @@ -248,12 +248,12 @@ func TestParseBranchNodeExecution(t *testing.T) { func TestParseDynamicNodeExecution(t *testing.T) { tests := []struct { - name string - nodeExecution *admin.NodeExecution - taskExecutions []*admin.TaskExecution - nodeExecutions []*admin.NodeExecution - categoryDurations map[string][]int64 - referenceCount int + name string + nodeExecution *admin.NodeExecution + taskExecutions []*admin.TaskExecution + nodeExecutions []*admin.NodeExecution + operationDurations map[string][]int64 + referenceCount int }{ { "NotStarted", @@ -268,7 +268,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { nil, nil, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, + nodeSetup: []int64{5}, }, 0, }, @@ -297,7 +297,7 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, nil, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + nodeSetup: []int64{10}, }, 1, }, @@ -349,8 +349,8 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_RESET.String(): []int64{15}, + nodeSetup: []int64{10}, + nodeReset: []int64{15}, }, 2, }, @@ -402,9 +402,9 @@ func TestParseDynamicNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_RESET.String(): []int64{15}, - admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{20}, + nodeSetup: []int64{10}, + nodeReset: []int64{15}, + nodeTeardown: []int64{20}, }, 2, }, @@ -446,13 +446,13 @@ func TestParseDynamicNodeExecution(t *testing.T) { } // parse node execution - spans := make([]*admin.Span, 0) + spans := make([]*core.Span, 0) err := metricsManager.parseDynamicNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) assert.Nil(t, err) // validate spans - categoryDurations, referenceCount := parseSpansInfo(spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, referenceCount := parseSpans(spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) assert.Equal(t, test.referenceCount, referenceCount) }) } @@ -460,9 +460,9 @@ func TestParseDynamicNodeExecution(t *testing.T) { func TestParseGateNodeExecution(t *testing.T) { tests := []struct { - name string - nodeExecution *admin.NodeExecution - categoryDurations map[string][]int64 + name string + nodeExecution *admin.NodeExecution + operationDurations map[string][]int64 }{ { "NotStarted", @@ -475,7 +475,7 @@ func TestParseGateNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, + nodeSetup: []int64{5}, }, }, { @@ -489,8 +489,8 @@ func TestParseGateNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_IDLE.String(): []int64{5}, + nodeSetup: []int64{10}, + nodeIdle: []int64{5}, }, }, { @@ -504,9 +504,9 @@ func TestParseGateNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_IDLE.String(): []int64{400}, - admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{15}, + nodeSetup: []int64{10}, + nodeIdle: []int64{400}, + nodeTeardown: []int64{15}, }, }, } @@ -517,23 +517,23 @@ func TestParseGateNodeExecution(t *testing.T) { metricsManager := MetricsManager{} // parse node execution - spans := make([]*admin.Span, 0) + spans := make([]*core.Span, 0) metricsManager.parseGateNodeExecution(context.TODO(), test.nodeExecution, &spans) // validate spans - categoryDurations, _ := parseSpansInfo(spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, _ := parseSpans(spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) }) } } func TestParseLaunchPlanNodeExecution(t *testing.T) { tests := []struct { - name string - nodeExecution *admin.NodeExecution - execution *admin.Execution - categoryDurations map[string][]int64 - referenceCount int + name string + nodeExecution *admin.NodeExecution + execution *admin.Execution + operationDurations map[string][]int64 + referenceCount int }{ { "NotStarted", @@ -547,7 +547,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { }, nil, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, + nodeSetup: []int64{5}, }, 0, }, @@ -575,7 +575,7 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + nodeSetup: []int64{10}, }, 1, }, @@ -603,8 +603,8 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{15}, + nodeSetup: []int64{10}, + nodeTeardown: []int64{15}, }, 1, }, @@ -675,13 +675,13 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { } // parse node execution - spans := make([]*admin.Span, 0) + spans := make([]*core.Span, 0) err := metricsManager.parseLaunchPlanNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) assert.Nil(t, err) // validate spans - categoryDurations, referenceCount := parseSpansInfo(spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, referenceCount := parseSpans(spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) assert.Equal(t, test.referenceCount, referenceCount) }) } @@ -689,11 +689,11 @@ func TestParseLaunchPlanNodeExecution(t *testing.T) { func TestParseSubworkflowNodeExecution(t *testing.T) { tests := []struct { - name string - nodeExecution *admin.NodeExecution - nodeExecutions []*admin.NodeExecution - categoryDurations map[string][]int64 - referenceCount int + name string + nodeExecution *admin.NodeExecution + nodeExecutions []*admin.NodeExecution + operationDurations map[string][]int64 + referenceCount int }{ { "NotStarted", @@ -710,7 +710,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, nil, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, + nodeSetup: []int64{5}, }, 0, }, @@ -752,7 +752,7 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + nodeSetup: []int64{10}, }, 1, }, @@ -794,8 +794,8 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{20}, + nodeSetup: []int64{10}, + nodeTeardown: []int64{20}, }, 1, }, @@ -840,13 +840,13 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { } // parse node execution - spans := make([]*admin.Span, 0) + spans := make([]*core.Span, 0) err := metricsManager.parseSubworkflowNodeExecution(context.TODO(), test.nodeExecution, &core.Identifier{}, &spans, -1) assert.Nil(t, err) // validate spans - categoryDurations, referenceCount := parseSpansInfo(spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, referenceCount := parseSpans(spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) assert.Equal(t, test.referenceCount, referenceCount) }) } @@ -854,9 +854,9 @@ func TestParseSubworkflowNodeExecution(t *testing.T) { func TestParseTaskExecution(t *testing.T) { tests := []struct { - name string - taskExecution *admin.TaskExecution - categoryDurations map[string][]int64 + name string + taskExecution *admin.TaskExecution + operationDurations map[string][]int64 }{ { "NotStarted", @@ -869,7 +869,7 @@ func TestParseTaskExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_TASK_SETUP.String(): []int64{5}, + taskSetup: []int64{5}, }, }, { @@ -883,8 +883,8 @@ func TestParseTaskExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_TASK_SETUP.String(): []int64{5}, - admin.CategoricalSpanInfo_TASK_RUNTIME.String(): []int64{600}, + taskSetup: []int64{5}, + taskRuntime: []int64{600}, }, }, { @@ -898,9 +898,9 @@ func TestParseTaskExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_TASK_SETUP.String(): []int64{5}, - admin.CategoricalSpanInfo_TASK_RUNTIME.String(): []int64{400}, - admin.CategoricalSpanInfo_TASK_TEARDOWN.String(): []int64{10}, + taskSetup: []int64{5}, + taskRuntime: []int64{400}, + taskTeardown: []int64{10}, }, }, } @@ -909,12 +909,12 @@ func TestParseTaskExecution(t *testing.T) { t.Run(test.name, func(t *testing.T) { // parse task execution span := parseTaskExecution(test.taskExecution) - spanReference, ok := span.Info.(*admin.Span_Reference) + _, ok := span.Id.(*core.Span_TaskId) assert.True(t, ok) // validate spans - categoryDurations, referenceCount := parseSpansInfo(spanReference.Reference.Spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, referenceCount := parseSpans(span.Spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) assert.Equal(t, 0, referenceCount) }) } @@ -922,10 +922,10 @@ func TestParseTaskExecution(t *testing.T) { func TestParseTaskExecutions(t *testing.T) { tests := []struct { - name string - taskExecutions []*admin.TaskExecution - categoryDurations map[string][]int64 - referenceCount int + name string + taskExecutions []*admin.TaskExecution + operationDurations map[string][]int64 + referenceCount int }{ { "SingleAttempt", @@ -963,7 +963,7 @@ func TestParseTaskExecutions(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_RESET.String(): []int64{20}, + nodeReset: []int64{20}, }, 2, }, @@ -972,12 +972,12 @@ func TestParseTaskExecutions(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { // parse task executions - spans := make([]*admin.Span, 0) + spans := make([]*core.Span, 0) parseTaskExecutions(test.taskExecutions, &spans, -1) // validate spans - categoryDurations, referenceCount := parseSpansInfo(spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, referenceCount := parseSpans(spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) assert.Equal(t, test.referenceCount, referenceCount) }) } @@ -985,11 +985,11 @@ func TestParseTaskExecutions(t *testing.T) { func TestParseTaskNodeExecution(t *testing.T) { tests := []struct { - name string - nodeExecution *admin.NodeExecution - taskExecutions []*admin.TaskExecution - categoryDurations map[string][]int64 - referenceCount int + name string + nodeExecution *admin.NodeExecution + taskExecutions []*admin.TaskExecution + operationDurations map[string][]int64 + referenceCount int }{ { "NotStarted", @@ -1003,7 +1003,7 @@ func TestParseTaskNodeExecution(t *testing.T) { }, nil, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{5}, + nodeSetup: []int64{5}, }, 0, }, @@ -1028,7 +1028,7 @@ func TestParseTaskNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, + nodeSetup: []int64{10}, }, 1, }, @@ -1053,8 +1053,8 @@ func TestParseTaskNodeExecution(t *testing.T) { }, }, map[string][]int64{ - admin.CategoricalSpanInfo_NODE_SETUP.String(): []int64{10}, - admin.CategoricalSpanInfo_NODE_TEARDOWN.String(): []int64{15}, + nodeSetup: []int64{10}, + nodeTeardown: []int64{15}, }, 1, }, @@ -1069,13 +1069,13 @@ func TestParseTaskNodeExecution(t *testing.T) { } // parse node execution - spans := make([]*admin.Span, 0) + spans := make([]*core.Span, 0) err := metricsManager.parseTaskNodeExecution(context.TODO(), test.nodeExecution, &spans, -1) assert.Nil(t, err) // validate spans - categoryDurations, referenceCount := parseSpansInfo(spans) - assert.True(t, reflect.DeepEqual(test.categoryDurations, categoryDurations)) + operationDurations, referenceCount := parseSpans(spans) + assert.True(t, reflect.DeepEqual(test.operationDurations, operationDurations)) assert.Equal(t, test.referenceCount, referenceCount) }) } From 047f18f210ef286e619f2062ee5c4987b43f9e38 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Wed, 22 Mar 2023 18:33:12 -0500 Subject: [PATCH 26/27] bumped flyteidl deps Signed-off-by: Daniel Rammer --- go.mod | 4 +--- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/go.mod b/go.mod index 4d962503c..6710d33bb 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/cloudevents/sdk-go/v2 v2.8.0 github.com/coreos/go-oidc v2.2.1+incompatible github.com/evanphx/json-patch v4.12.0+incompatible - github.com/flyteorg/flyteidl v1.3.13 + github.com/flyteorg/flyteidl v1.3.14 github.com/flyteorg/flyteplugins v1.0.40 github.com/flyteorg/flytepropeller v1.1.70 github.com/flyteorg/flytestdlib v1.0.15 @@ -209,5 +209,3 @@ require ( ) replace github.com/robfig/cron/v3 => github.com/unionai/cron/v3 v3.0.2-0.20210825070134-bfc34418fe84 - -replace github.com/flyteorg/flyteidl => github.com/flyteorg/flyteidl v1.3.13-0.20230321181500-b7fd869e65be diff --git a/go.sum b/go.sum index 8c1757520..f2fe7c70b 100644 --- a/go.sum +++ b/go.sum @@ -312,8 +312,8 @@ github.com/fatih/structs v1.0.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga github.com/fatih/structs v1.1.0/go.mod h1:9NiDSp5zOcgEDl+j00MP/WkGVPOlPRLejGD8Ga6PJ7M= github.com/felixge/httpsnoop v1.0.1 h1:lvB5Jl89CsZtGIWuTcDM1E/vkVs49/Ml7JJe07l8SPQ= github.com/felixge/httpsnoop v1.0.1/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= -github.com/flyteorg/flyteidl v1.3.13-0.20230321181500-b7fd869e65be h1:zGGUhPqIa+6wD9pPlV28i37zXHQgu7feF4a9kJ4mDuo= -github.com/flyteorg/flyteidl v1.3.13-0.20230321181500-b7fd869e65be/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= +github.com/flyteorg/flyteidl v1.3.14 h1:o5M0g/r6pXTPu5PEurbYxbQmuOu3hqqsaI2M6uvK0N8= +github.com/flyteorg/flyteidl v1.3.14/go.mod h1:Pkt2skI1LiHs/2ZoekBnyPhuGOFMiuul6HHcKGZBsbM= github.com/flyteorg/flyteplugins v1.0.40 h1:RTsYingqmqr13qBbi4CB2ArXDHNHUOkAF+HTLJQiQ/s= github.com/flyteorg/flyteplugins v1.0.40/go.mod h1:qyUPqVspLcLGJpKxVwHDWf+kBpOGuItOxCaF6zAmDio= github.com/flyteorg/flytepropeller v1.1.70 h1:/d1qqz13rdVADM85ST70eerAdBstJJz9UUB/mNSZi0w= From a4a2330224ebc1feb78a8d3f05327ad83ae354a0 Mon Sep 17 00:00:00 2001 From: Daniel Rammer Date: Mon, 27 Mar 2023 11:57:27 -0500 Subject: [PATCH 27/27] using consts for start-node and end-node Signed-off-by: Daniel Rammer --- pkg/manager/impl/metrics_manager.go | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/manager/impl/metrics_manager.go b/pkg/manager/impl/metrics_manager.go index 1b9a0be18..a6d010b1e 100644 --- a/pkg/manager/impl/metrics_manager.go +++ b/pkg/manager/impl/metrics_manager.go @@ -12,6 +12,8 @@ import ( "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/admin" "github.com/flyteorg/flyteidl/gen/pb-go/flyteidl/core" + "github.com/flyteorg/flytepropeller/pkg/apis/flyteworkflow/v1alpha1" + "github.com/flyteorg/flytestdlib/promutils" "github.com/golang/protobuf/ptypes/duration" @@ -254,7 +256,7 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec } } else { // between task execution(s) and node execution(s) overhead - startNode := nodeExecutions["start-node"] + startNode := nodeExecutions[v1alpha1.StartNodeID] *spans = append(*spans, createOperationSpan(taskExecutions[len(taskExecutions)-1].Closure.UpdatedAt, startNode.Closure.UpdatedAt, nodeReset)) @@ -270,7 +272,7 @@ func (m *MetricsManager) parseDynamicNodeExecution(ctx context.Context, nodeExec } // backened overhead - latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", + latestUpstreamNode := m.getLatestUpstreamNodeExecution(v1alpha1.EndNodeID, nodeExecutionData.DynamicWorkflow.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createOperationSpan(latestUpstreamNode.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, nodeTeardown)) @@ -302,7 +304,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex } // check if workflow has started - startNode := nodeExecutions["start-node"] + startNode := nodeExecutions[v1alpha1.StartNodeID] if startNode.Closure.UpdatedAt == nil || reflect.DeepEqual(startNode.Closure.UpdatedAt, emptyTimestamp) { spans = append(spans, createOperationSpan(execution.Closure.CreatedAt, execution.Closure.UpdatedAt, workflowSetup)) } else { @@ -315,7 +317,7 @@ func (m *MetricsManager) parseExecution(ctx context.Context, execution *admin.Ex } // compute backend overhead - latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", + latestUpstreamNode := m.getLatestUpstreamNodeExecution(v1alpha1.EndNodeID, workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !execution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { spans = append(spans, createOperationSpan(latestUpstreamNode.Closure.UpdatedAt, @@ -466,7 +468,7 @@ func (m *MetricsManager) parseNodeExecutions(ctx context.Context, nodeExecutions // iterate over sorted node executions for _, nodeExecution := range sortedNodeExecutions { specNodeID := nodeExecution.Metadata.SpecNodeId - if specNodeID == "start-node" || specNodeID == "end-node" { + if specNodeID == v1alpha1.StartNodeID || specNodeID == v1alpha1.EndNodeID { continue } @@ -523,7 +525,7 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, nodeExecution.Closure.UpdatedAt, nodeSetup)) } else { // frontend overhead - startNode := nodeExecutions["start-node"] + startNode := nodeExecutions[v1alpha1.StartNodeID] *spans = append(*spans, createOperationSpan(nodeExecution.Closure.CreatedAt, startNode.Closure.UpdatedAt, nodeSetup)) // retrieve workflow @@ -539,7 +541,7 @@ func (m *MetricsManager) parseSubworkflowNodeExecution(ctx context.Context, } // backened overhead - latestUpstreamNode := m.getLatestUpstreamNodeExecution("end-node", + latestUpstreamNode := m.getLatestUpstreamNodeExecution(v1alpha1.EndNodeID, workflow.Closure.CompiledWorkflow.Primary.Connections.Upstream, nodeExecutions) if latestUpstreamNode != nil && !nodeExecution.Closure.UpdatedAt.AsTime().Before(latestUpstreamNode.Closure.UpdatedAt.AsTime()) { *spans = append(*spans, createOperationSpan(latestUpstreamNode.Closure.UpdatedAt, nodeExecution.Closure.UpdatedAt, nodeTeardown))