Skip to content

AI Chatbot Evaluation #6

AI Chatbot Evaluation

AI Chatbot Evaluation #6

name: AI Chatbot Evaluation
on:
# push:
# branches:
# - main
# pull_request:
# branches:
# - main
workflow_dispatch:
inputs:
input_testset_id:
description: 'Testset ID'
required: true
scoring_config_id:
description: 'Scoring Config ID'
required: true
repository_dispatch:
types: start-evaluation
permissions:
contents: read
jobs:
evaluation-test:
runs-on: ubuntu-latest
steps:
- name: Checkout code
uses: actions/checkout@v3
- name: Set up Python 3.11
uses: actions/setup-python@v3
with:
python-version: '3.11'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Set PR testset and scoring config
if: github.event_name == 'push' || github.event_name == 'pull_request'
run: |
echo "DEFAULT_TESTSET_ID=214" >> $GITHUB_ENV
echo "DEFAULT_SCORING_CONFIG_ID=59" >> $GITHUB_ENV
- name: Run test
env:
# API keys
SCORECARD_API_KEY: ${{ secrets.SCORECARD_API_KEY }}
# Astra DB credentials
ASTRA_DB_API_ENDPOINT: ${{ secrets.ASTRA_DB_API_ENDPOINT }}
ASTRA_DB_APPLICATION_TOKEN: ${{ secrets.ASTRA_DB_APPLICATION_TOKEN }}
ASTRA_DB_TABLE_NAME: ${{ secrets.ASTRA_DB_TABLE_NAME }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
# Testset and Scoring Config values
# 1. Check if there's an input from manual trigger (workflow_dispatch)
# 2. Fallback to values sent from external sources (repository_dispatch)
# 3. Use default values set as environment variables if neither is available
INPUT_TESTSET_ID: ${{ github.event.inputs.input_testset_id || github.event.client_payload.input_testset_id || env.DEFAULT_TESTSET_ID }}
SCORING_CONFIG_ID: ${{ github.event.inputs.scoring_config_id || github.event.client_payload.scoring_config_id || env.DEFAULT_SCORING_CONFIG_ID }}
run: python run_tests.py