Skip to content

Commit

Permalink
compatibility Test coverage (#1057)
Browse files Browse the repository at this point in the history
  • Loading branch information
adityabharadwaj198 authored Dec 31, 2024
1 parent dab8a08 commit e7d04cc
Show file tree
Hide file tree
Showing 50 changed files with 3,634 additions and 828 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/backwards_compatibility_marqo_execution.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ jobs:
# Step to install dependencies from requirements.txt
- name: Install Dependencies
run: |
pip install -r tests/backwards_compatibility_tests/requirements.txt
pip install -r tests/compatibility_tests/requirements.txt
# Step to configure AWS credentials
- name: Configure AWS credentials
Expand All @@ -136,7 +136,7 @@ jobs:
MODE: ${{ inputs.mode || github.event.inputs.mode }}
run: |
export PYTHONPATH=${{ github.workspace }}:$PYTHONPATH
python tests/backwards_compatibility_tests/compatibility_test_runner.py \
python tests/compatibility_tests/compatibility_test_runner.py \
--mode "$MODE" \
--from_version "$FROM_VERSION" \
--to_version "$TO_VERSION" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,15 @@ jobs:
if [ "${{ github.event_name }}" == "workflow_dispatch" ] && [ -n "${{ github.event.inputs.to_version }}" ]; then
VERSION="${{ github.event.inputs.to_version }}"
else
VERSION=$(python tests/backwards_compatibility_tests/scripts/determine_to_version.py ${{ github.sha }})
VERSION=$(python tests/compatibility_tests/scripts/determine_to_version.py ${{ github.sha }})
fi
echo "to_version=${VERSION}" >> $GITHUB_OUTPUT
# Step to generate the list of versions to test
- name: Generate version list #this code block just generates the from_version list and stores it in a versions variable as a list
id: generate-versions
run: |
# Run the Python script and capture its output
VERSION_LIST=$(python tests/backwards_compatibility_tests/scripts/generate_versions.py ${{ steps.get-to-version.outputs.to_version }} ${{ env.MAX_VERSIONS_TO_TEST }})
VERSION_LIST=$(python tests/compatibility_tests/scripts/generate_versions.py ${{ steps.get-to-version.outputs.to_version }} ${{ env.MAX_VERSIONS_TO_TEST }})
echo "list=${VERSION_LIST}" >> $GITHUB_OUTPUT
# Step to display the versions to test
- name: display versions
Expand Down
743 changes: 0 additions & 743 deletions tests/backwards_compatibility_tests/compatibility_test_runner.py

This file was deleted.

4 changes: 0 additions & 4 deletions tests/backwards_compatibility_tests/requirements.txt

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import traceback
from inspect import trace

import pytest
from tests.compatibility_tests.base_test_case.base_compatibility_test import BaseCompatibilityTestCase

@pytest.mark.marqo_version('2.12.0')
class TestAddDocumentsv2_12(BaseCompatibilityTestCase):
structured_index_name = "test_add_doc_api_structured_index_2_12_0"

indexes_to_test_on = [
{
"indexName": structured_index_name,
"type": "structured",
"vectorNumericType": "float",
"model": "open_clip/ViT-B-32/laion2b_s34b_b79k",
"normalizeEmbeddings": True,
"textPreprocessing": {
"splitLength": 2,
"splitOverlap": 0,
"splitMethod": "sentence",
},
"imagePreprocessing": {"patchMethod": None},
"allFields": [
{"name": "image_field", "type": "image_pointer"},
{"name": "video_field_1", "type": "video_pointer"}, #TODO: write this example for video_pointer and audio_pointers
{"name": "audio_field_1", "type": "audio_pointer"},
{"name": "text_field_3", "type": "text", "features": ["lexical_search"]},
],
"tensorFields": ["video_field_1", "audio_field_1", "image_field", "text_field_3"],
"annParameters": {
"spaceType": "prenormalized-angular",
"parameters": {"efConstruction": 512, "m": 16},
}
}]

documents = [
# These documents cannot be added right now, because the model open_clip/ViT-B-32/laion2b_s34b_b79k, does not support audio and video. We could change the model to something like LanguageBind/Video_V1.5_FT_Audio_FT_Image, but that fails with
# marqo.api.exceptions.ModelCacheManagementError: ModelCacheManagementError: You are trying to load a model with size = `8` into device = `cpu`, which is larger than the device threshold = `1.6`.Marqo CANNOT find enough space for the model.Please change the threshold by adjusting the environment variables.
# Since these tests are currently running on a non GPU machine, we will skip these documents for now.
# {
# "video_field_1": "https://marqo-k400-video-test-dataset.s3.amazonaws.com/videos/---QUuC4vJs_000084_000094.mp4",
# "_id": "1"
# },
# {
# "audio_field_1": "https://marqo-ecs-50-audio-test-dataset.s3.amazonaws.com/audios/marqo-audio-test.mp3",
# "_id": "2"
# },
{
"image_field": "https://raw.githubusercontent.com/marqo-ai/marqo-api-tests/mainline/assets/ai_hippo_realistic.png",
"_id": "3"
},
{
"text_field_3": "hello there Padawan. Today you will begin your training to be a Jedi",
"_id": "4"
},
]

@classmethod
def tearDownClass(cls) -> None:
cls.indexes_to_delete = [index['indexName'] for index in cls.indexes_to_test_on]
super().tearDownClass()

@classmethod
def setUpClass(cls) -> None:
cls.indexes_to_delete = [index['indexName'] for index in cls.indexes_to_test_on]
super().setUpClass()

def prepare(self):
self.create_indexes(self.indexes_to_test_on)

errors = [] # Collect errors to report them at the end

for index in self.indexes_to_test_on:
self.logger.info(
f"Feeding document to index {index.get('indexName')} in test case: {self.__class__.__name__}")
try:
if index.get("type") is not None and index.get('type') == 'structured':
self.client.index(index_name = index['indexName']).add_documents(documents = self.documents)
except Exception as e:
errors.append((index, traceback.format_exc()))

all_results = {}

for index in self.indexes_to_test_on:
self.logger.debug(f'Feeding documents to {index.get("indexName")}')
index_name = index['indexName']
all_results[index_name] = {}

for doc in self.documents:
try:
doc_id = doc['_id']
all_results[index_name][doc_id] = self.client.index(index_name).get_document(doc_id)
except Exception as e:
errors.append((index, traceback.format_exc()))

if errors:
failure_message = "\n".join([
f"Failure in index {idx}, {error}"
for idx, error in errors
])
self.logger.error(f"Some subtests failed:\n{failure_message}. When the corresponding test runs for this index, it is expected to fail")
self.save_results_to_file(all_results)

def test_add_doc(self):
self.logger.info(f"Running test_add_doc on {self.__class__.__name__}")
stored_results = self.load_results_from_file()
test_failures = [] #this stores the failures in the subtests. These failures could be assertion errors or any other types of exceptions


for index in self.indexes_to_test_on:
index_name = index['indexName']
for doc in self.documents:
doc_id = doc['_id']
try:
with self.subTest(index=index_name, doc_id=doc_id):
expected_doc = stored_results[index_name][doc_id]
self.logger.debug(f"Printing expected doc {expected_doc}")
actual_doc = self.client.index(index_name).get_document(doc_id)
self.assertEqual(expected_doc, actual_doc)

except Exception as e:
test_failures.append((index_name, doc_id, traceback.format_exc()))

# After all subtests, raise a comprehensive failure if any occurred
if test_failures:
failure_message = "\n".join([
f"Failure in index {idx}, doc_id {doc_id}: {error}"
for idx, doc_id, error in test_failures
])
self.fail(f"Some subtests failed:\n{failure_message}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
import traceback

import pytest
from tests.compatibility_tests.base_test_case.base_compatibility_test import BaseCompatibilityTestCase

@pytest.mark.marqo_version('2.2.0')
class TestAddDocumentsv2_2(BaseCompatibilityTestCase):
structured_index_name = "test_add_doc_api_structured_index_2_2_0"

indexes_to_test_on = [
{
"indexName": structured_index_name,
"type": "structured",
"vectorNumericType": "float",
"model": "open_clip/ViT-B-32/laion2b_s34b_b79k",
"normalizeEmbeddings": True,
"textPreprocessing": {
"splitLength": 2,
"splitOverlap": 0,
"splitMethod": "sentence",
},
"imagePreprocessing": {"patchMethod": None},
"allFields": [
{"name": "text_field", "type": "text", "features": ["lexical_search"]},
{"name": "caption", "type": "text", "features": ["lexical_search", "filter"]},
{"name": "tags", "type": "array<text>", "features": ["filter"]},
{"name": "image_field", "type": "image_pointer"},
{"name": "my_int", "type": "int", "features": ["score_modifier"]},
# this field maps the above image field and text fields into a multimodal combination.
{
"name": "multimodal_field",
"type": "multimodal_combination",
"dependentFields": {"image_field": 0.9, "text_field": 0.1},
},
{"name": "boolean_field", "type": "bool"},
{"name": "float_field_1", "type": "float"},
{"name": "array_int_field_1", "type": "array<int>"},
{"name": "array_float_field_1", "type": "array<float>"},
{"name": "array_long_field_1", "type": "array<long>"},
{"name": "array_double_field_1", "type": "array<double>"},
{"name": "long_field_1", "type": "long"},
{"name": "double_field_1", "type": "double"},
],
"tensorFields": ["multimodal_field"],
"annParameters": {
"spaceType": "prenormalized-angular",
"parameters": {"efConstruction": 512, "m": 16},
}
}]

text_docs = [{
"text_field": "The Travels of Marco Polo",
"caption": "A 13th-century travelogue describing the travels of Polo",
"tags": ["wow", "this", "is", "awesome"],
"my_int": 123,
"boolean_field": True,
"float_field_1": 1.23,
"array_int_field_1": [1, 2, 3],
"array_float_field_1": [1.23, 2.34, 3.45],
"array_long_field_1": [1234567890, 2345678901, 3456789012],
"array_double_field_1": [1.234567890, 2.345678901, 3.456789012],
"long_field_1": 1234567890,
"double_field_1": 1.234567890,

"_id": "article_602"
},
{
"text_field": "Extravehicular Mobility Unit (EMU)",
"caption": "The EMU is a spacesuit that provides environmental protection",
"tags": ["space", "EMU", "NASA", "astronaut"],
"my_int": 354,
"boolean_field": True,
"float_field_1": 1.56,
"array_int_field_1": [4, 5, 6],
"array_float_field_1": [1.14, 2.21, 3.31],
"array_long_field_1": [3456789012, 1234567890, 2345678901],
"array_double_field_1": [1.234567890, 2.345678901, 3.456789012],
"long_field_1": 1234567890,
"double_field_1": 1.234567890,
"_id": "article_603"
}]

@classmethod
def tearDownClass(cls) -> None:
cls.indexes_to_delete = [index['indexName'] for index in cls.indexes_to_test_on]
super().tearDownClass()

@classmethod
def setUpClass(cls) -> None:
cls.indexes_to_delete = [index['indexName'] for index in cls.indexes_to_test_on]
super().setUpClass()

def prepare(self):
self.logger.debug(f"Creating indexes {self.indexes_to_test_on} in test case: {self.__class__.__name__}")
self.create_indexes(self.indexes_to_test_on)

self.logger.debug(f'Feeding documents to {self.indexes_to_test_on}')

errors = [] # Collect errors to report them at the end

for index in self.indexes_to_test_on:
try:
if index.get("type") is not None and index.get('type') == 'structured':
self.client.index(index_name = index['indexName']).add_documents(documents = self.text_docs)
else:
self.client.index(index_name = index['indexName']).add_documents(documents = self.text_docs,
tensor_fields = ["Description", "Genre", "Title"])
except Exception as e:
errors.append((index, traceback.format_exc()))

all_results = {}

for index in self.indexes_to_test_on:
index_name = index['indexName']
all_results[index_name] = {}

for doc in self.text_docs:
try:
doc_id = doc['_id']
all_results[index_name][doc_id] = self.client.index(index_name).get_document(doc_id)
except Exception as e:
errors.append((index, traceback.format_exc()))

if errors:
failure_message = "\n".join([
f"Failure in index {idx}, {error}"
for idx, error in errors
])
self.logger.error(f"Some subtests failed:\n{failure_message}. When the corresponding test runs for this index, it is expected to fail")
self.save_results_to_file(all_results)

def test_add_doc(self):
self.logger.info(f"Running test_add_doc on {self.__class__.__name__}")
stored_results = self.load_results_from_file()
test_failures = [] #this stores the failures in the subtests. These failures could be assertion errors or any other types of exceptions


for index in self.indexes_to_test_on:
index_name = index['indexName']
for doc in self.text_docs:
doc_id = doc['_id']
try:
with self.subTest(index=index_name, doc_id=doc_id):
expected_doc = stored_results[index_name][doc_id]
self.logger.debug(f"Printing expected doc {expected_doc}")
actual_doc = self.client.index(index_name).get_document(doc_id)
self.assertEqual(expected_doc, actual_doc)

except Exception as e:
test_failures.append((index_name, doc_id, traceback.format_exc()))

# After all subtests, raise a comprehensive failure if any occurred
if test_failures:
failure_message = "\n".join([
f"Failure in index {idx}, doc_id {doc_id}: {error}"
for idx, doc_id, error in test_failures
])
self.fail(f"Some subtests failed:\n{failure_message}")
Loading

0 comments on commit e7d04cc

Please sign in to comment.