Skip to content

Commit

Permalink
Added fast register functionality (flyteorg#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
yindia authored and austin362667 committed May 7, 2024
1 parent 12c3228 commit d5e52e0
Show file tree
Hide file tree
Showing 22 changed files with 611 additions and 122 deletions.
2 changes: 2 additions & 0 deletions flytectl/.github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: "0"
- uses: actions/cache@v2
with:
path: |
Expand Down
1 change: 1 addition & 0 deletions flytectl/cmd/config/subcommand/register/files_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ type FilesConfig struct {
AssumableIamRole string `json:"assumableIamRole" pflag:", custom assumable iam auth role to register launch plans with."`
K8ServiceAccount string `json:"k8ServiceAccount" pflag:", custom kubernetes service account auth role to register launch plans with."`
OutputLocationPrefix string `json:"outputLocationPrefix" pflag:", custom output location prefix for offloaded types (files/schemas)."`
SourceUploadPath string `json:"sourceUploadPath" pflag:", Location for source code in storage."`
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions flytectl/cmd/config/subcommand/register/filesconfig_flags_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

39 changes: 1 addition & 38 deletions flytectl/cmd/config/subcommand/sandbox/config_flags.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions flytectl/cmd/config/subcommand/sandbox/config_flags_test.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions flytectl/cmd/config/subcommand/sandbox/sandbox_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ var (
DefaultConfig = &Config{}
)

// Config represents the config parameters exposed for the `sandbox` command.
//Config
type Config struct {
SourcesPath string `json:"sourcesPath" pflag:",Path to your source code path where flyte workflows and tasks are."`
Source string `json:"source" pflag:", Path of your source code"`
}
8 changes: 5 additions & 3 deletions flytectl/cmd/register/examples.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,19 +19,21 @@ Registers all latest flytesnacks example
Usage
`
)

var (
githubOrg = "flyteorg"
githubRepository = "flytesnacks"
archive = true
snackReleaseURL = "https://github.com/flyteorg/flytesnacks/releases/download/%s/flytesnacks-%s.tgz"
flyteManifest = "https://github.com/flyteorg/flytesnacks/releases/download/%s/flyte_tests_manifest.json"
)

func registerExamplesFunc(ctx context.Context, args []string, cmdCtx cmdCore.CommandContext) error {
flytesnacks, tag, err := getFlyteTestManifest()
flytesnacks, tag, err := getFlyteTestManifest(githubOrg, githubRepository)
if err != nil {
return err
}
rconfig.DefaultFilesConfig.Archive = archive
rconfig.DefaultFilesConfig.Archive = true
for _, v := range flytesnacks {
args := []string{
fmt.Sprintf(snackReleaseURL, tag, v.Name),
Expand Down
26 changes: 26 additions & 0 deletions flytectl/cmd/register/examples_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package register

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestRegisterExamplesFunc(t *testing.T) {
setup()
registerFilesSetup()
args = []string{""}
err := registerExamplesFunc(ctx, args, cmdCtx)
assert.NotNil(t, err)
}
func TestRegisterExamplesFuncErr(t *testing.T) {
setup()
registerFilesSetup()
githubRepository = "testingsnacks"
args = []string{""}

err := registerExamplesFunc(ctx, args, cmdCtx)
// TODO (Yuvraj) make test to success after fixing flytesnacks bug
assert.NotNil(t, err)
githubRepository = "flytesnacks"
}
60 changes: 49 additions & 11 deletions flytectl/cmd/register/files.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package register
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"

rconfig "github.com/flyteorg/flytectl/cmd/config/subcommand/register"
cmdCore "github.com/flyteorg/flytectl/cmd/core"
Expand All @@ -19,7 +21,18 @@ If there are already registered entities with v1 version then the command will f
::
bin/flytectl register file _pb_output/* -d development -p flytesnacks
There is no difference between registration and fast registration, In fast registration, the input provided by the user is fast serialized proto that is generated by pyflyte. If Flytectl finds any source code in users's input then it will consider registration as fast registration. Flytectl finds input file by searching an archive file whose name starts with fast and has .tar.gz extension When the user runs pyflyte with --fast flag then pyflyte creates serialize proto and it also archive create source code archive file in the same directory.
SourceUploadPath is an optional flag. By default, flytectl will create SourceUploadPath from your storage config. In case of s3 flytectl will upload code base in s3://{{DEFINE_BUCKET_IN_STORAGE_CONFIG}}/fast/{{VERSION}}-fast{{MD5_CREATED_BY_PYFLYTE}.tar.gz}.
::
bin/flytectl register file _pb_output/* -d development -p flytesnacks -v v2
In case of fast registration, If the SourceUploadPath flag is defined then In this case flytectl will not use the default directory for uploading the source code, it will override the destination path on the registration
::
bin/flytectl register file _pb_output/* -d development -p flytesnacks -v v2 --SourceUploadPath="s3://dummy/fast"
Using archive file.Currently supported are .tgz and .tar extension files and can be local or remote file served through http/https.
Use --archive flag.
Expand All @@ -33,8 +46,7 @@ Using local tgz file.
bin/flytectl register files _pb_output.tgz -d development -p flytesnacks --archive
If you want to continue executing registration on other files ignoring the errors including version conflicts then pass in
the continueOnError flag.
If you want to continue executing registration on other files ignoring the errors including version conflicts then pass in the continueOnError flag.
::
Expand Down Expand Up @@ -73,27 +85,53 @@ Override Output location prefix during registration.
::
bin/flytectl register file _pb_output/* -d development -p flytesnacks --continueOnError -v v2 -l "s3://dummy/prefix"
Usage
`
sourceCodeExtension = ".tar.gz"
)

func registerFromFilesFunc(ctx context.Context, args []string, cmdCtx cmdCore.CommandContext) error {
return Register(ctx, args, cmdCtx)
}

func Register(ctx context.Context, args []string, cmdCtx cmdCore.CommandContext) error {
dataRefs, tmpDir, _err := getSortedFileList(ctx, args)
if _err != nil {
logger.Errorf(ctx, "error while un-archiving files in tmp dir due to %v", _err)
return _err
var _err error
var dataRefs []string

// getSerializeOutputFiles will return you all proto and source code compress file in sorted order
dataRefs, tmpDir, err := getSerializeOutputFiles(ctx, args)
if err != nil {
logger.Errorf(ctx, "error while un-archiving files in tmp dir due to %v", err)
return err
}
logger.Infof(ctx, "Parsing file... Total(%v)", len(dataRefs))

// It will segregate serialize output files in valid proto,Invalid files if have any and source code(In case of fast serialize input files)
sourceCode, validProto, InvalidFiles := segregateSourceAndProtos(dataRefs)

// If any invalid files provide in input then through an error
if len(InvalidFiles) > 0 {
return fmt.Errorf("input package have some invalid files. try to run pyflyte package again %v", InvalidFiles)
}

// In case of fast serialize input upload source code to destination bucket
var sourceCodeName string
if len(sourceCode) > 0 {
logger.Infof(ctx, "Fast Registration detected")
_, sourceCodeName = filepath.Split(sourceCode)
if err = uploadFastRegisterArtifact(ctx, sourceCode, sourceCodeName, rconfig.DefaultFilesConfig.Version); err != nil {
return fmt.Errorf("please check your Storage Config. It failed while uploading the source code. %v", err)
}
logger.Infof(ctx, "Source code successfully uploaded %v/%v ", rconfig.DefaultFilesConfig.SourceUploadPath, sourceCodeName)
}
logger.Infof(ctx, "Parsing files... Total(%v)", len(dataRefs))
fastFail := !rconfig.DefaultFilesConfig.ContinueOnError

var registerResults []Result
for i := 0; i < len(dataRefs) && !(fastFail && _err != nil); i++ {
registerResults, _err = registerFile(ctx, dataRefs[i], registerResults, cmdCtx)
fastFail := rconfig.DefaultFilesConfig.ContinueOnError
for i := 0; i < len(validProto) && !(fastFail && _err != nil); i++ {
registerResults, _err = registerFile(ctx, validProto[i], sourceCodeName, registerResults, cmdCtx)
}

payload, _ := json.Marshal(registerResults)
registerPrinter := printer.Printer{}
_ = registerPrinter.JSONToTable(payload, projectColumns)
Expand Down
Loading

0 comments on commit d5e52e0

Please sign in to comment.