- Methods
add(documents)
batchexplain(queries, texts=None, limit=10)
batchsimilarity(queries, texts)
batchtransform(texts)
count()
delete(ids)
explain(query, texts=None, limit=10)
extract(queue, texts=None)
index()
label(text, labels)
pipeline(name, args)
reindex(config, function=None)
similarity(query, texts)
transform(text)
upsert()
wait()
workflow(name, elements)
Methods
Bases: Application
Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.
Downstream applications can extend this base template to add/modify functionality.
Source code in txtai/api/base.py
| class API(Application): “”” Base API template. The API is an extended txtai application, adding the ability to cluster API instances together.
Downstream applications can extend this base template to add/modify functionality. “””
def init(self, config, loaddata=True): super().init(config, loaddata)
# Embeddings cluster self.cluster = None if self.config.get(“cluster”): self.cluster = Cluster(self.config[“cluster”])
# pylint: disable=W0221 def search(self, query, limit=None, weights=None, index=None, request=None): # When search is invoked via the API, limit is set from the request # When search is invoked directly, limit is set using the method parameter limit = self.limit(request.query_params.get(“limit”) if request and hasattr(request, “query_params”) else limit) weights = self.weights(request.query_params.get(“weights”) if request and hasattr(request, “query_params”) else weights) index = request.query_params.get(“index”) if request and hasattr(request, “query_params”) else index
if self.cluster: return self.cluster.search(query, limit, weights, index)
return super().search(query, limit, weights, index)
def batchsearch(self, queries, limit=None, weights=None, index=None): if self.cluster: return self.cluster.batchsearch(queries, self.limit(limit), weights, index)
return super().batchsearch(queries, limit, weights, index)
def add(self, documents): “”” Adds a batch of documents for indexing.
Downstream applications can override this method to also store full documents in an external system.
Args: documents: list of {id: value, text: value}
Returns: unmodified input documents “””
if self.cluster: self.cluster.add(documents) else: super().add(documents)
return documents
def index(self): “”” Builds an embeddings index for previously batched documents. “””
if self.cluster: self.cluster.index() else: super().index()
def upsert(self): “”” Runs an embeddings upsert operation for previously batched documents. “””
if self.cluster: self.cluster.upsert() else: super().upsert()
def delete(self, ids): “”” Deletes from an embeddings index. Returns list of ids deleted.
Args: ids: list of ids to delete
Returns: ids deleted “””
if self.cluster: return self.cluster.delete(ids)
return super().delete(ids)
def reindex(self, config, function=None): “”” Recreates this embeddings index using config. This method only works if document content storage is enabled.
Args: config: new config function: optional function to prepare content for indexing “””
if self.cluster: self.cluster.reindex(config, function) else: super().reindex(config, function)
def count(self): “”” Total number of elements in this embeddings index.
Returns: number of elements in embeddings index “””
if self.cluster: return self.cluster.count()
return super().count()
def limit(self, limit): “”” Parses the number of results to return from the request. Allows range of 1-250, with a default of 10.
Args: limit: limit parameter
Returns: bounded limit “””
# Return between 1 and 250 results, defaults to 10 return max(1, min(250, int(limit) if limit else 10))
def weights(self, weights): “”” Parses the weights parameter from the request.
Args: weights: weights parameter
Returns: weights “””
return float(weights) if weights else weights
|
add(documents)
Adds a batch of documents for indexing.
Downstream applications can override this method to also store full documents in an external system.
Parameters:
Name | Type | Description | Default |
---|
documents | | list of {id: value, text: value} | required |
Returns:
Type | Description |
---|
| unmodified input documents |
Source code in txtai/api/base.py
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
| def add(self, documents): “”” Adds a batch of documents for indexing.
Downstream applications can override this method to also store full documents in an external system.
Args: documents: list of {id: value, text: value}
Returns: unmodified input documents “””
if self.cluster: self.cluster.add(documents) else: super().add(documents)
return documents
|
batchexplain(queries, texts=None, limit=10)
Explains the importance of each input token in text for a list of queries.
Parameters:
Name | Type | Description | Default |
---|
query | | | required |
texts | | optional list of text, otherwise runs search queries | None |
limit | | optional limit if texts is None | 10 |
Returns:
Type | Description |
---|
| list of dict per input text per query where a higher token scores represents higher importance relative to the query |
Source code in txtai/app/base.py
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588
| def batchexplain(self, queries, texts=None, limit=10): “”” Explains the importance of each input token in text for a list of queries.
Args: query: queries text texts: optional list of text, otherwise runs search queries limit: optional limit if texts is None
Returns: list of dict per input text per query where a higher token scores represents higher importance relative to the query “””
if self.embeddings: with self.lock: return self.embeddings.batchexplain(queries, texts, limit)
return None
|
batchsimilarity(queries, texts)
Computes the similarity between list of queries and list of text. Returns a list of {id: value, score: value} sorted by highest score per query, where id is the index in texts.
Parameters:
Name | Type | Description | Default |
---|
queries | | | required |
texts | | | required |
Returns:
Type | Description |
---|
| list of {id: value, score: value} per query |
Source code in txtai/app/base.py
530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550
| def batchsimilarity(self, queries, texts): “”” Computes the similarity between list of queries and list of text. Returns a list of {id: value, score: value} sorted by highest score per query, where id is the index in texts.
Args: queries: queries text texts: list of text
Returns: list of {id: value, score: value} per query “””
# Use similarity instance if available otherwise fall back to embeddings model if “similarity” in self.pipelines: return [[{“id”: uid, “score”: float(score)} for uid, score in r] for r in self.pipelines“similarity”] if self.embeddings: return [[{“id”: uid, “score”: float(score)} for uid, score in r] for r in self.embeddings.batchsimilarity(queries, texts)]
return None
|
batchtransform(texts)
Transforms list of text into embeddings arrays.
Parameters:
Name | Type | Description | Default |
---|
texts | | | required |
Returns:
Source code in txtai/app/base.py
606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
| def batchtransform(self, texts): “”” Transforms list of text into embeddings arrays.
Args: texts: list of text
Returns: embeddings arrays “””
if self.embeddings: documents = [(None, text, None) for text in texts] return [[float(x) for x in result] for result in self.embeddings.batchtransform(documents)]
return None
|
count()
Total number of elements in this embeddings index.
Returns:
Type | Description |
---|
| number of elements in embeddings index |
Source code in txtai/api/base.py
114 115 116 117 118 119 120 121 122 123 124 125
| def count(self): “”” Total number of elements in this embeddings index.
Returns: number of elements in embeddings index “””
if self.cluster: return self.cluster.count()
return super().count()
|
delete(ids)
Deletes from an embeddings index. Returns list of ids deleted.
Parameters:
Name | Type | Description | Default |
---|
ids | | | required |
Returns:
Source code in txtai/api/base.py
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
| def delete(self, ids): “”” Deletes from an embeddings index. Returns list of ids deleted.
Args: ids: list of ids to delete
Returns: ids deleted “””
if self.cluster: return self.cluster.delete(ids)
return super().delete(ids)
|
explain(query, texts=None, limit=10)
Explains the importance of each input token in text for a query.
Parameters:
Name | Type | Description | Default |
---|
query | | | required |
texts | | optional list of text, otherwise runs search query | None |
limit | | optional limit if texts is None | 10 |
Returns:
Type | Description |
---|
| list of dict per input text where a higher token scores represents higher importance relative to the query |
Source code in txtai/app/base.py
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
| def explain(self, query, texts=None, limit=10): “”” Explains the importance of each input token in text for a query.
Args: query: query text texts: optional list of text, otherwise runs search query limit: optional limit if texts is None
Returns: list of dict per input text where a higher token scores represents higher importance relative to the query “””
if self.embeddings: with self.lock: return self.embeddings.explain(query, texts, limit)
return None
|
Extracts answers to input questions.
Parameters:
Name | Type | Description | Default |
---|
queue | | list of {name: value, query: value, question: value, snippet: value} | required |
texts | | | None |
Returns:
Type | Description |
---|
| list of {name: value, answer: value} |
Source code in txtai/app/base.py
623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
| def extract(self, queue, texts=None): “”” Extracts answers to input questions.
Args: queue: list of {name: value, query: value, question: value, snippet: value} texts: optional list of text
Returns: list of {name: value, answer: value} “””
if self.embeddings and “extractor” in self.pipelines: # Get extractor instance extractor = self.pipelines[“extractor”]
# Run extractor and return results as dicts return extractor(queue, texts)
return None
|
index()
Builds an embeddings index for previously batched documents.
Source code in txtai/api/base.py
| def index(self): “”” Builds an embeddings index for previously batched documents. “””
if self.cluster: self.cluster.index() else: super().index()
|
label(text, labels)
Applies a zero shot classifier to text using a list of labels. Returns a list of {id: value, score: value} sorted by highest score, where id is the index in labels.
Parameters:
Name | Type | Description | Default |
---|
text | | | required |
labels | | | required |
Returns:
Type | Description |
---|
| list of {id: value, score: value} per text element |
Source code in txtai/app/base.py
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
| def label(self, text, labels): “”” Applies a zero shot classifier to text using a list of labels. Returns a list of {id: value, score: value} sorted by highest score, where id is the index in labels.
Args: text: text|list labels: list of labels
Returns: list of {id: value, score: value} per text element “””
if “labels” in self.pipelines: # Text is a string if isinstance(text, str): return [{“id”: uid, “score”: float(score)} for uid, score in self.pipelines“labels”]
# Text is a list return [[{“id”: uid, “score”: float(score)} for uid, score in result] for result in self.pipelines“labels”]
return None
|
pipeline(name, args)
Generic pipeline execution method.
Parameters:
Name | Type | Description | Default |
---|
name | | | required |
args | | | required |
Returns:
Source code in txtai/app/base.py
667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
| def pipeline(self, name, args): “”” Generic pipeline execution method.
Args: name: pipeline name args: pipeline arguments
Returns: pipeline results “””
if name in self.pipelines: return self.pipelinesname
return None
|
reindex(config, function=None)
Recreates this embeddings index using config. This method only works if document content storage is enabled.
Parameters:
Name | Type | Description | Default |
---|
config | | | required |
function | | optional function to prepare content for indexing | None |
Source code in txtai/api/base.py
100 101 102 103 104 105 106 107 108 109 110 111 112
| def reindex(self, config, function=None): “”” Recreates this embeddings index using config. This method only works if document content storage is enabled.
Args: config: new config function: optional function to prepare content for indexing “””
if self.cluster: self.cluster.reindex(config, function) else: super().reindex(config, function)
|
similarity(query, texts)
Computes the similarity between query and list of text. Returns a list of {id: value, score: value} sorted by highest score, where id is the index in texts.
Parameters:
Name | Type | Description | Default |
---|
query | | | required |
texts | | | required |
Returns:
Type | Description |
---|
| list of {id: value, score: value} |
Source code in txtai/app/base.py
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
| def similarity(self, query, texts): “”” Computes the similarity between query and list of text. Returns a list of {id: value, score: value} sorted by highest score, where id is the index in texts.
Args: query: query text texts: list of text
Returns: list of {id: value, score: value} “””
# Use similarity instance if available otherwise fall back to embeddings model if “similarity” in self.pipelines: return [{“id”: uid, “score”: float(score)} for uid, score in self.pipelines“similarity”] if self.embeddings: return [{“id”: uid, “score”: float(score)} for uid, score in self.embeddings.similarity(query, texts)]
return None
|
transform(text)
Transforms text into embeddings arrays.
Parameters:
Name | Type | Description | Default |
---|
text | | | required |
Returns:
Source code in txtai/app/base.py
590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
| def transform(self, text): “”” Transforms text into embeddings arrays.
Args: text: input text
Returns: embeddings array “””
if self.embeddings: return [float(x) for x in self.embeddings.transform((None, text, None))]
return None
|
upsert()
Runs an embeddings upsert operation for previously batched documents.
Source code in txtai/api/base.py
| def upsert(self): “”” Runs an embeddings upsert operation for previously batched documents. “””
if self.cluster: self.cluster.upsert() else: super().upsert()
|
wait()
Closes threadpool and waits for completion.
Source code in txtai/app/base.py
706 707 708 709 710 711 712 713 714
| def wait(self): “”” Closes threadpool and waits for completion. “””
if self.pool: self.pool.close() self.pool.join() self.pool = None
|
workflow(name, elements)
Executes a workflow.
Parameters:
Name | Type | Description | Default |
---|
name | | | required |
elements | | | required |
Returns:
Source code in txtai/app/base.py
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
| def workflow(self, name, elements): “”” Executes a workflow.
Args: name: workflow name elements: elements to process
Returns: processed elements “””
if hasattr(elements, “len“) and hasattr(elements, “getitem“): # Convert to tuples and return as a list since input is sized elements = [tuple(element) if isinstance(element, list) else element for element in elements] else: # Convert to tuples and return as a generator since input is not sized elements = (tuple(element) if isinstance(element, list) else element for element in elements)
# Execute workflow return self.workflowsname
|