Skip to content

Commit

Permalink
Remove deprecated queries option in query() (#272)
Browse files Browse the repository at this point in the history
## Problem

There was an argument on the `query()` method for passing multiple
queries at once. This has long been deprecated in the API and SDK, so
it's time to remove it.
 
## Solution

- Clean up the `queries` option to both `Index#query` and
`IndexGRPC#query`.
- Remove code no longer needed to interpret the many forms these query
options could take
- Cleanup and simplify response parsing logic
- Remove related tests passing `queries` kwarg

## Type of Change

- [x] Breaking change (fix or feature that would cause existing
functionality to not work as expected)

## Test Plan

Recently added integration test suite gives high confidence queries
still work after this change.
  • Loading branch information
jhamon authored Jan 13, 2024
1 parent 5370b09 commit d86dd1b
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 161 deletions.
41 changes: 7 additions & 34 deletions pinecone/data/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
FetchResponse,
QueryRequest,
QueryResponse,
QueryVector,
RpcStatus,
ScoredVector,
SingleQueryResults,
Expand All @@ -33,7 +32,6 @@
"FetchResponse",
"QueryRequest",
"QueryResponse",
"QueryVector",
"RpcStatus",
"ScoredVector",
"SingleQueryResults",
Expand All @@ -60,12 +58,8 @@
"async_req",
)

def parse_query_response(response: QueryResponse, unary_query: bool):
if unary_query:
response._data_store.pop("results", None)
else:
response._data_store.pop("matches", None)
response._data_store.pop("namespace", None)
def parse_query_response(response: QueryResponse):
response._data_store.pop("results", None)
return response

class Index():
Expand Down Expand Up @@ -324,7 +318,6 @@ def query(
top_k: int,
vector: Optional[List[float]] = None,
id: Optional[str] = None,
queries: Optional[Union[List[QueryVector], List[Tuple]]] = None,
namespace: Optional[str] = None,
filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
include_values: Optional[bool] = None,
Expand All @@ -351,13 +344,10 @@ def query(
Args:
vector (List[float]): The query vector. This should be the same length as the dimension of the index
being queried. Each `query()` request can contain only one of the parameters
`queries`, `id` or `vector`.. [optional]
`id` or `vector`.. [optional]
id (str): The unique ID of the vector to be used as a query vector.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
queries ([QueryVector]): DEPRECATED. The query vectors.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
`vector` or `id`. [optional]
top_k (int): The number of results to return for each query. Must be an integer greater than 1.
namespace (str): The namespace to fetch vectors from.
If not specified, the default namespace is used. [optional]
Expand All @@ -371,35 +361,19 @@ def query(
sparse_vector: (Union[SparseValues, Dict[str, Union[List[float], List[int]]]]): sparse values of the query vector.
Expected to be either a SparseValues object or a dict of the form:
{'indices': List[int], 'values': List[float]}, where the lists each have the same length.
Keyword Args:
Supports OpenAPI client keyword arguments. See pinecone.core.client.models.QueryRequest for more details.
Returns: QueryResponse object which contains the list of the closest vectors as ScoredVector objects,
and namespace name.
"""

def _query_transform(item):
if isinstance(item, QueryVector):
return item
if isinstance(item, tuple):
values, filter = fix_tuple_length(item, 2)
if filter is None:
return QueryVector(values=values, _check_type=_check_type)
else:
return QueryVector(values=values, filter=filter, _check_type=_check_type)
if isinstance(item, Iterable):
return QueryVector(values=item, _check_type=_check_type)
raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}")

_check_type = kwargs.pop("_check_type", False)
queries = list(map(_query_transform, queries)) if queries is not None else None

sparse_vector = self._parse_sparse_values_arg(sparse_vector)
args_dict = self._parse_non_empty_args(
[
("vector", vector),
("id", id),
("queries", queries),
("queries", None),
("top_k", top_k),
("namespace", namespace),
("filter", filter),
Expand All @@ -416,8 +390,7 @@ def _query_transform(item):
),
**{k: v for k, v in kwargs.items() if k in _OPENAPI_ENDPOINT_PARAMS},
)
unary_query = True if vector is not None or id else False
return parse_query_response(response, unary_query)
return parse_query_response(response)

@validate_and_convert_errors
def update(
Expand Down
2 changes: 0 additions & 2 deletions pinecone/grpc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,7 @@

from pinecone.core.grpc.protos.vector_service_pb2 import (
Vector as GRPCVector,
QueryVector as GRPCQueryVector,
SparseValues as GRPCSparseValues,
Vector,
QueryVector,
SparseValues
)
25 changes: 3 additions & 22 deletions pinecone/grpc/index_grpc.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import logging
import numbers
from typing import Optional, Dict, Iterable, Union, List, Tuple, Any, TypedDict, cast
from collections.abc import Mapping
from typing import Optional, Dict, Union, List, Tuple, Any, TypedDict, cast

from google.protobuf import json_format

Expand Down Expand Up @@ -31,7 +29,6 @@
)
from pinecone import Vector as NonGRPCVector
from pinecone.core.grpc.protos.vector_service_pb2_grpc import VectorServiceStub
from pinecone.utils import fix_tuple_length
from .base import GRPCIndexBase
from .future import PineconeGrpcFuture

Expand Down Expand Up @@ -293,7 +290,6 @@ def query(
self,
vector: Optional[List[float]] = None,
id: Optional[str] = None,
queries: Optional[Union[List[GRPCQueryVector], List[Tuple]]] = None,
namespace: Optional[str] = None,
top_k: Optional[int] = None,
filter: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
Expand Down Expand Up @@ -323,9 +319,6 @@ def query(
id (str): The unique ID of the vector to be used as a query vector.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
queries ([GRPCQueryVector]): DEPRECATED. The query vectors.
Each `query()` request can contain only one of the parameters
`queries`, `vector`, or `id`.. [optional]
top_k (int): The number of results to return for each query. Must be an integer greater than 1.
namespace (str): The namespace to fetch vectors from.
If not specified, the default namespace is used. [optional]
Expand All @@ -344,18 +337,7 @@ def query(
and namespace name.
"""

def _query_transform(item):
if isinstance(item, GRPCQueryVector):
return item
if isinstance(item, tuple):
values, filter = fix_tuple_length(item, 2)
filter = dict_to_proto_struct(filter)
return GRPCQueryVector(values=values, filter=filter)
if isinstance(item, Iterable):
return GRPCQueryVector(values=item)
raise ValueError(f"Invalid query vector value passed: cannot interpret type {type(item)}")

queries = list(map(_query_transform, queries)) if queries is not None else None
queries = None

if filter is not None:
filter_struct = dict_to_proto_struct(filter)
Expand All @@ -382,8 +364,7 @@ def _query_transform(item):
timeout = kwargs.pop("timeout", None)
response = self._wrap_grpc_call(self.stub.Query, request, timeout=timeout)
json_response = json_format.MessageToDict(response)
unary_query = True if vector is not None or id else False
return parse_query_response(json_response, unary_query, _check_type=False)
return parse_query_response(json_response, _check_type=False)

def update(
self,
Expand Down
66 changes: 14 additions & 52 deletions pinecone/grpc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,21 @@

from google.protobuf.struct_pb2 import Struct

def _generate_request_id() -> str:
return str(uuid.uuid4())

from pinecone.core.client.models import (
Vector as _Vector,
Usage,
ScoredVector,
SparseValues,
FetchResponse,
SingleQueryResults,
QueryResponse,
DescribeIndexStatsResponse,
NamespaceSummary,
)

from typing import NamedTuple, Optional

class QueryResponseKwargs(NamedTuple):
check_type: bool
namespace: Optional[str]
matches: Optional[list]
results: Optional[list]
usage: Usage

def _generate_request_id() -> str:
return str(uuid.uuid4())
from typing import Optional

def dict_to_proto_struct(d: Optional[dict]) -> "Struct":
if not d:
Expand Down Expand Up @@ -54,13 +46,11 @@ def parse_fetch_response(response: dict):
metadata=vec.get("metadata", None),
_check_type=False,
)

usage = parse_usage(response)

return FetchResponse(
vectors=vd,
namespace=namespace,
usage=usage,
usage=parse_usage(response),
_check_type=False
)

Expand All @@ -69,26 +59,8 @@ def parse_usage(response):
return Usage(read_units=int(u.get("readUnits", 0)))


def parse_query_response(response: dict, unary_query: bool, _check_type: bool = False):
res = []

# TODO: consider deleting this deprecated case
for match in response.get("results", []):
namespace = match.get("namespace", "")
m = []
if "matches" in match:
for item in match["matches"]:
sc = ScoredVector(
id=item["id"],
score=item.get("score", 0.0),
values=item.get("values", []),
sparse_values=parse_sparse_values(item.get("sparseValues")),
metadata=item.get("metadata", None),
)
m.append(sc)
res.append(SingleQueryResults(matches=m, namespace=namespace))

m = []
def parse_query_response(response: dict, _check_type: bool = False):
matches = []
for item in response.get("matches", []):
sc = ScoredVector(
id=item["id"],
Expand All @@ -98,24 +70,14 @@ def parse_query_response(response: dict, unary_query: bool, _check_type: bool =
metadata=item.get("metadata", None),
_check_type=_check_type,
)
m.append(sc)

if unary_query:
namespace = response.get("namespace", "")
matches = m
results = []
else:
namespace = ""
matches = []
results = res
matches.append(sc)

usage = parse_usage(response)


kw = QueryResponseKwargs(_check_type, namespace, matches, results, usage)
kw_dict = kw._asdict()
kw_dict["_check_type"] = kw.check_type
return QueryResponse(**kw._asdict())
return QueryResponse(
namespace=response.get("namespace", ""),
matches=matches,
usage = parse_usage(response),
_check_type=_check_type
)


def parse_stats_response(response: dict):
Expand Down
1 change: 0 additions & 1 deletion pinecone/grpc/vector_factory_grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

from pinecone.core.grpc.protos.vector_service_pb2 import (
Vector as GRPCVector,
QueryVector as GRPCQueryVector,
SparseValues as GRPCSparseValues,
)
from pinecone import (
Expand Down
22 changes: 0 additions & 22 deletions tests/unit/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,28 +363,6 @@ def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker):
pinecone.QueryRequest(top_k=10, vector=self.vals1, filter=self.filter1, namespace="ns")
)

def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker):
mocker.patch.object(self.index._vector_api, "query", autospec=True)
self.index.query(top_k=10, queries=[(self.vals1,), (self.vals2,)])
self.index._vector_api.query.assert_called_once_with(
pinecone.QueryRequest(
top_k=10, queries=[pinecone.QueryVector(values=self.vals1), pinecone.QueryVector(values=self.vals2)]
)
)

def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker):
mocker.patch.object(self.index._vector_api, "query", autospec=True)
self.index.query(top_k=10, queries=[(self.vals1, self.filter1), (self.vals2, self.filter2)])
self.index._vector_api.query.assert_called_once_with(
pinecone.QueryRequest(
top_k=10,
queries=[
pinecone.QueryVector(values=self.vals1, filter=self.filter1),
pinecone.QueryVector(values=self.vals2, filter=self.filter2),
],
)
)

def test_query_byVecId_queryByVecId(self, mocker):
mocker.patch.object(self.index._vector_api, "query", autospec=True)
self.index.query(top_k=10, id="vec1", include_metadata=True, include_values=False)
Expand Down
28 changes: 0 additions & 28 deletions tests/unit_grpc/test_grpc_index_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
from pinecone.grpc import GRPCIndex
from pinecone.core.grpc.protos.vector_service_pb2 import (
QueryRequest,
QueryVector,
)
from pinecone.grpc.utils import dict_to_proto_struct


class TestGrpcIndexQuery:
def setup_method(self):
self.config = Config(api_key="test-api-key", host="foo")
Expand All @@ -36,32 +34,6 @@ def test_query_byVectorWithFilter_queryVectorWithFilter(self, mocker, vals1, fil
timeout=10,
)

def test_query_byTuplesNoFilter_queryVectorsNoFilter(self, mocker, vals1, vals2):
mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True)
self.index.query(top_k=10, queries=[(vals1,), (vals2,)])
self.index._wrap_grpc_call.assert_called_once_with(
self.index.stub.Query,
QueryRequest(
queries=[QueryVector(values=vals1, filter={}), QueryVector(values=vals2, filter={})], top_k=10
),
timeout=None,
)

def test_query_byTuplesWithFilter_queryVectorsWithFilter(self, mocker, vals1, vals2, filter1, filter2):
mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True)
self.index.query(top_k=10, queries=[(vals1, filter1), (vals2, filter2)])
self.index._wrap_grpc_call.assert_called_once_with(
self.index.stub.Query,
QueryRequest(
queries=[
QueryVector(values=vals1, filter=dict_to_proto_struct(filter1)),
QueryVector(values=vals2, filter=dict_to_proto_struct(filter2)),
],
top_k=10,
),
timeout=None,
)

def test_query_byVecId_queryByVecId(self, mocker):
mocker.patch.object(self.index, "_wrap_grpc_call", autospec=True)
self.index.query(top_k=10, id="vec1", include_metadata=True, include_values=False)
Expand Down

0 comments on commit d86dd1b

Please sign in to comment.