Skip to content

Commit

Permalink
Initialize Imara project
Browse files Browse the repository at this point in the history
This project showcases GenAI application development in Google Cloud.
  • Loading branch information
jerop committed Feb 23, 2024
0 parents commit 752c445
Show file tree
Hide file tree
Showing 31 changed files with 41,410 additions and 0 deletions.
103 changes: 103 additions & 0 deletions .cloudbuild/cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

substitutions:
_GCS_BUCKET: gs://rlhf-artifacts
_PIPELINE_REGISTRY: rlhf-pipelines
_REGION: us-central1

steps:
- id: install requirements
name: python:3.11
entrypoint: bash
args:
- -c
- |
python -m pip install -U -r requirements.txt --user
- id: run unit tests
name: python:3.11
entrypoint: bash
args:
- -c
- |
python -m pytest
- id: copy data to bucket
name: gcr.io/cloud-builders/gsutil
entrypoint: bash
args:
- -c
- |
gsutil cp -r data ${_GCS_BUCKET}
- id: compile tuning pipeline
name: python:3.11
entrypoint: python
args:
- -c
- |
from kfp import compiler
from pkg.tuner import rlhf, metadata
compiler.Compiler().compile(
pipeline_func=rlhf.tune, package_path=metadata.COMPILED_PIPELINE_PATH
)
- id: upload tuning pipeline
name: python:3.11
entrypoint: python
args:
- -c
- |
from pkg.tuner import registry, metadata
registry.upload(
project_id='${PROJECT_ID}',
region='${_REGION}',
pipeline_registry='${_PIPELINE_REGISTRY}',
compiled_pipeline_path=metadata.COMPILED_PIPELINE_PATH
)
- id: tune foundation model
name: python:3.11
entrypoint: python
args:
- -c
- |
import google.cloud.aiplatform as aiplatform
from pkg.tuner import steps, registry, metadata, parameters
REWARD_MODEL_TRAIN_STEPS = steps.get_reward_model_train_steps()
REINFORCEMENT_LEARNING_TRAIN_STEPS = steps.get_reinforcement_learning_train_steps()
PIPELINE_TEMPLATE_URI = 'https://${_REGION}-kfp.pkg.dev/$PROJECT_ID/${_PIPELINE_REGISTRY}/rlhf-tune-pipeline/latest'
job = aiplatform.PipelineJob(
display_name=metadata.MODEL_DISPLAY_NAME,
pipeline_root=metadata.PIPELINE_ROOT,
template_path=PIPELINE_TEMPLATE_URI,
parameter_values=parameters.get_values(
preference_dataset=metadata.PREFERENCE_DATASET,
prompt_dataset=metadata.PROMPT_DATASET,
eval_dataset=metadata.EVALUATION_DATASET,
reward_model_train_steps=REWARD_MODEL_TRAIN_STEPS,
reinforcement_learning_train_steps=REINFORCEMENT_LEARNING_TRAIN_STEPS
)
)
job.run()
#...add cloud deploy steps

timeout: 86400s
18 changes: 18 additions & 0 deletions .github/workflows/hello.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: GitHub Actions Demo
run-name: ${{ github.actor }} is testing out GitHub Actions 🚀
on: [push]
jobs:
Explore-GitHub-Actions:
runs-on: ubuntu-latest
steps:
- run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
- run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by GitHub!"
- run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
- name: Check out repository code
uses: actions/checkout@v4
- run: echo "💡 The ${{ github.repository }} repository has been cloned to the runner."
- run: echo "🖥️ The workflow is now ready to test your code on the runner."
- name: List files in the repository
run: |
ls ${{ github.workspace }}
- run: echo "🍏 This job's status is ${{ job.status }}."
160 changes: 160 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints

# IPython
profile_default/
ipython_config.py

# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version

# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock

# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock

# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml

# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/

# Celery stuff
celerybeat-schedule
celerybeat.pid

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
.dmypy.json
dmypy.json

# Pyre type checker
.pyre/

# pytype static type analyzer
.pytype/

# Cython debug symbols
cython_debug/

# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
Loading

0 comments on commit 752c445

Please sign in to comment.