Methods
API (Application)
Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.
Downstream applications can extend this base template to add/modify functionality.
Source code in txtai/api/base.py
class API(Application):
"""
Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.
Downstream applications can extend this base template to add/modify functionality.
"""
def __init__(self, config, loaddata=True):
super().__init__(config, loaddata)
# Embeddings cluster
self.cluster = None
if self.config.get("cluster"):
self.cluster = Cluster(self.config["cluster"])
# pylint: disable=W0221
def search(self, query, limit=None, request=None):
# When search is invoked via the API, limit is set from the request
# When search is invoked directly, limit is set using the method parameter
limit = self.limit(request.query_params.get("limit") if request and hasattr(request, "query_params") else limit)
if self.cluster:
return self.cluster.search(query, limit)
return super().search(query, limit)
def batchsearch(self, queries, limit=None):
if self.cluster:
return self.cluster.batchsearch(queries, self.limit(limit))
return super().batchsearch(queries, limit)
def add(self, documents):
"""
Adds a batch of documents for indexing.
Downstream applications can override this method to also store full documents in an external system.
Args:
documents: list of {id: value, text: value}
Returns:
unmodified input documents
"""
if self.cluster:
self.cluster.add(documents)
else:
super().add(documents)
return documents
def index(self):
"""
Builds an embeddings index for previously batched documents.
"""
if self.cluster:
self.cluster.index()
else:
super().index()
def upsert(self):
"""
Runs an embeddings upsert operation for previously batched documents.
"""
if self.cluster:
self.cluster.upsert()
else:
super().upsert()
def delete(self, ids):
"""
Deletes from an embeddings index. Returns list of ids deleted.
Args:
ids: list of ids to delete
Returns:
ids deleted
"""
if self.cluster:
return self.cluster.delete(ids)
return super().delete(ids)
def count(self):
"""
Total number of elements in this embeddings index.
Returns:
number of elements in embeddings index
"""
if self.cluster:
return self.cluster.count()
return super().count()
def limit(self, limit):
"""
Parses the number of results to return from the request. Allows range of 1-250, with a default of 10.
Args:
limit: limit parameter
Returns:
bounded limit
"""
# Return between 1 and 250 results, defaults to 10
return max(1, min(250, int(limit) if limit else 10))
add(self, documents)
Adds a batch of documents for indexing.
Downstream applications can override this method to also store full documents in an external system.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
documents | list of {id: value, text: value} | required |
Returns:
Type | Description |
---|---|
unmodified input documents |
Source code in txtai/api/base.py
def add(self, documents):
"""
Adds a batch of documents for indexing.
Downstream applications can override this method to also store full documents in an external system.
Args:
documents: list of {id: value, text: value}
Returns:
unmodified input documents
"""
if self.cluster:
self.cluster.add(documents)
else:
super().add(documents)
return documents
batchsearch(self, queries, limit=None)
Finds documents in the embeddings model most similar to the input queries. Returns a list of {id: value, score: value} sorted by highest score per query, where id is the document id in the embeddings model.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
queries | queries text | required | |
limit | maximum results | None |
Returns:
Type | Description |
---|---|
list of {id | value, score: value} per query |
Source code in txtai/api/base.py
def batchsearch(self, queries, limit=None):
if self.cluster:
return self.cluster.batchsearch(queries, self.limit(limit))
return super().batchsearch(queries, limit)
count(self)
Total number of elements in this embeddings index.
Returns:
Type | Description |
---|---|
number of elements in embeddings index |
Source code in txtai/api/base.py
def count(self):
"""
Total number of elements in this embeddings index.
Returns:
number of elements in embeddings index
"""
if self.cluster:
return self.cluster.count()
return super().count()
delete(self, ids)
Deletes from an embeddings index. Returns list of ids deleted.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
ids | list of ids to delete | required |
Returns:
Type | Description |
---|---|
ids deleted |
Source code in txtai/api/base.py
def delete(self, ids):
"""
Deletes from an embeddings index. Returns list of ids deleted.
Args:
ids: list of ids to delete
Returns:
ids deleted
"""
if self.cluster:
return self.cluster.delete(ids)
return super().delete(ids)
index(self)
Builds an embeddings index for previously batched documents.
Source code in txtai/api/base.py
def index(self):
"""
Builds an embeddings index for previously batched documents.
"""
if self.cluster:
self.cluster.index()
else:
super().index()
search(self, query, limit=None, request=None)
Finds documents in the embeddings model most similar to the input query. Returns a list of {id: value, score: value} sorted by highest score, where id is the document id in the embeddings model.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
query | query text | required | |
limit | maximum results, used if request is None | None |
Returns:
Type | Description |
---|---|
list of {id | value, score: value} |
Source code in txtai/api/base.py
def search(self, query, limit=None, request=None):
# When search is invoked via the API, limit is set from the request
# When search is invoked directly, limit is set using the method parameter
limit = self.limit(request.query_params.get("limit") if request and hasattr(request, "query_params") else limit)
if self.cluster:
return self.cluster.search(query, limit)
return super().search(query, limit)
upsert(self)
Runs an embeddings upsert operation for previously batched documents.
Source code in txtai/api/base.py
def upsert(self):
"""
Runs an embeddings upsert operation for previously batched documents.
"""
if self.cluster:
self.cluster.upsert()
else:
super().upsert()