From 7bcde6857e838a0e649291f953b68d110321da2b Mon Sep 17 00:00:00 2001 From: Jen Hamon Date: Fri, 12 Jan 2024 18:14:30 -0500 Subject: [PATCH] Initial load generation script --- scripts/generate_usage.py | 90 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100755 scripts/generate_usage.py diff --git a/scripts/generate_usage.py b/scripts/generate_usage.py new file mode 100755 index 00000000..1a2fc4a3 --- /dev/null +++ b/scripts/generate_usage.py @@ -0,0 +1,90 @@ +import os +import random +import string +from pinecone.grpc import PineconeGRPC + +def read_env_var(name): + value = os.environ.get(name) + if value is None: + raise Exception('Environment variable {} is not set'.format(name)) + return value + +def random_string(length): + return ''.join(random.choice(string.ascii_lowercase) for i in range(length)) + +def random_embedding_values(dimension=2): + return [random.random() for _ in range(dimension)] + +def write_gh_output(name, value): + with open(os.environ['GITHUB_OUTPUT'], 'a') as fh: + print(f'{name}={value}', file=fh) + +DIMENSION = 1536 # common for openai embeddings + +def create_index_if_not_exists(pc, index_name): + if index_name not in pc.list_indexes().names(): + print(f'Index {index_name} does not exist, creating it') + pc.create_index( + name=index_name, + metric='cosine', + dimension=DIMENSION, + spec={ + 'serverless': { + 'cloud': read_env_var('CLOUD'), + 'region': read_env_var('REGION'), + } + } + ) + +upserted_ids = set() + +def main(): + pc = PineconeGRPC(api_key=read_env_var('PINECONE_API_KEY')) + index_name = read_env_var('INDEX_NAME') + iterations = int(read_env_var('ITERATIONS')) + + create_index_if_not_exists(pc, index_name) + + index = pc.Index(name=index_name) + for i in range(iterations): + try: + # Upsert some vectors + items_to_upsert = random.randint(1, 100) + vector_list = [ + { + 'id': random_string(10), + 'values': random_embedding_values(DIMENSION), + 'metadata': { + 'genre': random.choice(['action', 'comedy', 'drama']), + 'runtime': random.randint(60, 120) + } + } for x in range(items_to_upsert) + ] + index.upsert(vectors=vector_list) + print('Upserted {} vectors'.format(items_to_upsert)) + + for v in vector_list: + upserted_ids.add(v['id']) + + # Fetch some vectors + ids_to_fetch = random.sample(upserted_ids, k=random.randint(1, 20)) + print('Fetching {} vectors'.format(len(ids_to_fetch))) + fetched_vectors = index.fetch(ids=ids_to_fetch) + + # Query some vectors + print('Querying 10 times') + for i in range(10): + # Query by vector values + query_vector = random_embedding_values(DIMENSION) + query_results = index.query(vector=query_vector, top_k=10) + + # Delete some vectors + print('Deleting some vectors') + id_to_delete = random.sample(upserted_ids, k=random.randint(1, 10)) + index.delete(ids=id_to_delete) + except Exception as e: + print('Exception: {}'.format(e)) + +if __name__ == '__main__': + main() +