Inference bucket aggregation
Inference bucket aggregation
A parent pipeline aggregation which loads a pre-trained model and performs inference on the collated result fields from the parent bucket aggregation.
To use the inference bucket aggregation, you need to have the same security privileges that are required for using the get trained models API.
Syntax
A inference
aggregation looks like this in isolation:
{
"inference": {
"model_id": "a_model_for_inference",
"inference_config": {
"regression_config": {
"num_top_feature_importance_values": 2
}
},
"buckets_path": {
"avg_cost": "avg_agg",
"max_cost": "max_agg"
}
}
}
The unique identifier or alias for the trained model. | |
The optional inference config which overrides the model’s default settings | |
Map the value of |
Table 63. inference
Parameters
Parameter Name | Description | Required | Default Value |
---|---|---|---|
| The ID or alias for the trained model. | Required | - |
| Contains the inference type and its options. There are two types: regression and classification | Optional | - |
| Defines the paths to the input aggregations and maps the aggregation names to the field names expected by the model. See buckets_path Syntax for more details | Required | - |
Configuration options for inference models
The inference_config
setting is optional and usually isn’t required as the pre-trained models come equipped with sensible defaults. In the context of aggregations some options can be overridden for each of the two types of model.
Configuration options for regression models
num_top_feature_importance_values
(Optional, integer) Specifies the maximum number of feature importance values per document. By default, it is zero and no feature importance calculation occurs.
Configuration options for classification models
num_top_classes
(Optional, integer) Specifies the number of top class predictions to return. Defaults to 0.
num_top_feature_importance_values
(Optional, integer) Specifies the maximum number of feature importance values per document. Defaults to 0 which means no feature importance calculation occurs.
prediction_field_type
(Optional, string) Specifies the type of the predicted field to write. Valid values are: string
, number
, boolean
. When boolean
is provided 1.0
is transformed to true
and 0.0
to false
.
Example
The following snippet aggregates a web log by client_ip
and extracts a number of features via metric and bucket sub-aggregations as input to the inference aggregation configured with a model trained to identify suspicious client IPs:
resp = client.search(
index="kibana_sample_data_logs",
size=0,
aggs={
"client_ip": {
"composite": {
"sources": [
{
"client_ip": {
"terms": {
"field": "clientip"
}
}
}
]
},
"aggs": {
"url_dc": {
"cardinality": {
"field": "url.keyword"
}
},
"bytes_sum": {
"sum": {
"field": "bytes"
}
},
"geo_src_dc": {
"cardinality": {
"field": "geo.src"
}
},
"geo_dest_dc": {
"cardinality": {
"field": "geo.dest"
}
},
"responses_total": {
"value_count": {
"field": "timestamp"
}
},
"success": {
"filter": {
"term": {
"response": "200"
}
}
},
"error404": {
"filter": {
"term": {
"response": "404"
}
}
},
"error503": {
"filter": {
"term": {
"response": "503"
}
}
},
"malicious_client_ip": {
"inference": {
"model_id": "malicious_clients_model",
"buckets_path": {
"response_count": "responses_total",
"url_dc": "url_dc",
"bytes_sum": "bytes_sum",
"geo_src_dc": "geo_src_dc",
"geo_dest_dc": "geo_dest_dc",
"success": "success._count",
"error404": "error404._count",
"error503": "error503._count"
}
}
}
}
}
},
)
print(resp)
response = client.search(
index: 'kibana_sample_data_logs',
body: {
size: 0,
aggregations: {
client_ip: {
composite: {
sources: [
{
client_ip: {
terms: {
field: 'clientip'
}
}
}
]
},
aggregations: {
url_dc: {
cardinality: {
field: 'url.keyword'
}
},
bytes_sum: {
sum: {
field: 'bytes'
}
},
geo_src_dc: {
cardinality: {
field: 'geo.src'
}
},
geo_dest_dc: {
cardinality: {
field: 'geo.dest'
}
},
responses_total: {
value_count: {
field: 'timestamp'
}
},
success: {
filter: {
term: {
response: '200'
}
}
},
"error404": {
filter: {
term: {
response: '404'
}
}
},
"error503": {
filter: {
term: {
response: '503'
}
}
},
malicious_client_ip: {
inference: {
model_id: 'malicious_clients_model',
buckets_path: {
response_count: 'responses_total',
url_dc: 'url_dc',
bytes_sum: 'bytes_sum',
geo_src_dc: 'geo_src_dc',
geo_dest_dc: 'geo_dest_dc',
success: 'success._count',
"error404": 'error404._count',
"error503": 'error503._count'
}
}
}
}
}
}
}
)
puts response
const response = await client.search({
index: "kibana_sample_data_logs",
size: 0,
aggs: {
client_ip: {
composite: {
sources: [
{
client_ip: {
terms: {
field: "clientip",
},
},
},
],
},
aggs: {
url_dc: {
cardinality: {
field: "url.keyword",
},
},
bytes_sum: {
sum: {
field: "bytes",
},
},
geo_src_dc: {
cardinality: {
field: "geo.src",
},
},
geo_dest_dc: {
cardinality: {
field: "geo.dest",
},
},
responses_total: {
value_count: {
field: "timestamp",
},
},
success: {
filter: {
term: {
response: "200",
},
},
},
error404: {
filter: {
term: {
response: "404",
},
},
},
error503: {
filter: {
term: {
response: "503",
},
},
},
malicious_client_ip: {
inference: {
model_id: "malicious_clients_model",
buckets_path: {
response_count: "responses_total",
url_dc: "url_dc",
bytes_sum: "bytes_sum",
geo_src_dc: "geo_src_dc",
geo_dest_dc: "geo_dest_dc",
success: "success._count",
error404: "error404._count",
error503: "error503._count",
},
},
},
},
},
},
});
console.log(response);
GET kibana_sample_data_logs/_search
{
"size": 0,
"aggs": {
"client_ip": {
"composite": {
"sources": [
{
"client_ip": {
"terms": {
"field": "clientip"
}
}
}
]
},
"aggs": {
"url_dc": {
"cardinality": {
"field": "url.keyword"
}
},
"bytes_sum": {
"sum": {
"field": "bytes"
}
},
"geo_src_dc": {
"cardinality": {
"field": "geo.src"
}
},
"geo_dest_dc": {
"cardinality": {
"field": "geo.dest"
}
},
"responses_total": {
"value_count": {
"field": "timestamp"
}
},
"success": {
"filter": {
"term": {
"response": "200"
}
}
},
"error404": {
"filter": {
"term": {
"response": "404"
}
}
},
"error503": {
"filter": {
"term": {
"response": "503"
}
}
},
"malicious_client_ip": {
"inference": {
"model_id": "malicious_clients_model",
"buckets_path": {
"response_count": "responses_total",
"url_dc": "url_dc",
"bytes_sum": "bytes_sum",
"geo_src_dc": "geo_src_dc",
"geo_dest_dc": "geo_dest_dc",
"success": "success._count",
"error404": "error404._count",
"error503": "error503._count"
}
}
}
}
}
}
}
A composite bucket aggregation that aggregates the data by | |
A series of metrics and bucket sub-aggregations. | |
Inference bucket aggregation that specifies the trained model and maps the aggregation names to the model’s input fields. |