Auto-interval date histogram aggregation
Auto-interval date histogram aggregation
A multi-bucket aggregation similar to the Date histogram except instead of providing an interval to use as the width of each bucket, a target number of buckets is provided indicating the number of buckets needed and the interval of the buckets is automatically chosen to best achieve that target. The number of buckets returned will always be less than or equal to this target number.
The buckets field is optional, and will default to 10 buckets if not specified.
Requesting a target of 10 buckets.
resp = client.search(
index="sales",
size="0",
aggs={
"sales_over_time": {
"auto_date_histogram": {
"field": "date",
"buckets": 10
}
}
},
)
print(resp)
response = client.search(
index: 'sales',
size: 0,
body: {
aggregations: {
sales_over_time: {
auto_date_histogram: {
field: 'date',
buckets: 10
}
}
}
}
)
puts response
const response = await client.search({
index: "sales",
size: 0,
aggs: {
sales_over_time: {
auto_date_histogram: {
field: "date",
buckets: 10,
},
},
},
});
console.log(response);
POST /sales/_search?size=0
{
"aggs": {
"sales_over_time": {
"auto_date_histogram": {
"field": "date",
"buckets": 10
}
}
}
}
Keys
Internally, a date is represented as a 64 bit number representing a timestamp in milliseconds-since-the-epoch. These timestamps are returned as the bucket key
s. The key_as_string
is the same timestamp converted to a formatted date string using the format specified with the format
parameter:
If no format
is specified, then it will use the first date format specified in the field mapping.
resp = client.search(
index="sales",
size="0",
aggs={
"sales_over_time": {
"auto_date_histogram": {
"field": "date",
"buckets": 5,
"format": "yyyy-MM-dd"
}
}
},
)
print(resp)
response = client.search(
index: 'sales',
size: 0,
body: {
aggregations: {
sales_over_time: {
auto_date_histogram: {
field: 'date',
buckets: 5,
format: 'yyyy-MM-dd'
}
}
}
}
)
puts response
const response = await client.search({
index: "sales",
size: 0,
aggs: {
sales_over_time: {
auto_date_histogram: {
field: "date",
buckets: 5,
format: "yyyy-MM-dd",
},
},
},
});
console.log(response);
POST /sales/_search?size=0
{
"aggs": {
"sales_over_time": {
"auto_date_histogram": {
"field": "date",
"buckets": 5,
"format": "yyyy-MM-dd"
}
}
}
}
Supports expressive date format pattern |
Response:
{
...
"aggregations": {
"sales_over_time": {
"buckets": [
{
"key_as_string": "2015-01-01",
"key": 1420070400000,
"doc_count": 3
},
{
"key_as_string": "2015-02-01",
"key": 1422748800000,
"doc_count": 2
},
{
"key_as_string": "2015-03-01",
"key": 1425168000000,
"doc_count": 2
}
],
"interval": "1M"
}
}
}
Intervals
The interval of the returned buckets is selected based on the data collected by the aggregation so that the number of buckets returned is less than or equal to the number requested. The possible intervals returned are:
seconds | In multiples of 1, 5, 10 and 30 |
minutes | In multiples of 1, 5, 10 and 30 |
hours | In multiples of 1, 3 and 12 |
days | In multiples of 1, and 7 |
months | In multiples of 1, and 3 |
years | In multiples of 1, 5, 10, 20, 50 and 100 |
In the worst case, where the number of daily buckets are too many for the requested number of buckets, the number of buckets returned will be 1/7th of the number of buckets requested.
Time Zone
Date-times are stored in Elasticsearch in UTC. By default, all bucketing and rounding is also done in UTC. The time_zone
parameter can be used to indicate that bucketing should use a different time zone.
Time zones may either be specified as an ISO 8601 UTC offset (e.g. +01:00
or -08:00
) or as a timezone id, an identifier used in the TZ database like America/Los_Angeles
.
Consider the following example:
resp = client.index(
index="my-index-000001",
id="1",
refresh=True,
document={
"date": "2015-10-01T00:30:00Z"
},
)
print(resp)
resp1 = client.index(
index="my-index-000001",
id="2",
refresh=True,
document={
"date": "2015-10-01T01:30:00Z"
},
)
print(resp1)
resp2 = client.index(
index="my-index-000001",
id="3",
refresh=True,
document={
"date": "2015-10-01T02:30:00Z"
},
)
print(resp2)
resp3 = client.search(
index="my-index-000001",
size="0",
aggs={
"by_day": {
"auto_date_histogram": {
"field": "date",
"buckets": 3
}
}
},
)
print(resp3)
response = client.index(
index: 'my-index-000001',
id: 1,
refresh: true,
body: {
date: '2015-10-01T00:30:00Z'
}
)
puts response
response = client.index(
index: 'my-index-000001',
id: 2,
refresh: true,
body: {
date: '2015-10-01T01:30:00Z'
}
)
puts response
response = client.index(
index: 'my-index-000001',
id: 3,
refresh: true,
body: {
date: '2015-10-01T02:30:00Z'
}
)
puts response
response = client.search(
index: 'my-index-000001',
size: 0,
body: {
aggregations: {
by_day: {
auto_date_histogram: {
field: 'date',
buckets: 3
}
}
}
}
)
puts response
const response = await client.index({
index: "my-index-000001",
id: 1,
refresh: "true",
document: {
date: "2015-10-01T00:30:00Z",
},
});
console.log(response);
const response1 = await client.index({
index: "my-index-000001",
id: 2,
refresh: "true",
document: {
date: "2015-10-01T01:30:00Z",
},
});
console.log(response1);
const response2 = await client.index({
index: "my-index-000001",
id: 3,
refresh: "true",
document: {
date: "2015-10-01T02:30:00Z",
},
});
console.log(response2);
const response3 = await client.search({
index: "my-index-000001",
size: 0,
aggs: {
by_day: {
auto_date_histogram: {
field: "date",
buckets: 3,
},
},
},
});
console.log(response3);
PUT my-index-000001/_doc/1?refresh
{
"date": "2015-10-01T00:30:00Z"
}
PUT my-index-000001/_doc/2?refresh
{
"date": "2015-10-01T01:30:00Z"
}
PUT my-index-000001/_doc/3?refresh
{
"date": "2015-10-01T02:30:00Z"
}
GET my-index-000001/_search?size=0
{
"aggs": {
"by_day": {
"auto_date_histogram": {
"field": "date",
"buckets" : 3
}
}
}
}
UTC is used if no time zone is specified, three 1-hour buckets are returned starting at midnight UTC on 1 October 2015:
{
...
"aggregations": {
"by_day": {
"buckets": [
{
"key_as_string": "2015-10-01T00:00:00.000Z",
"key": 1443657600000,
"doc_count": 1
},
{
"key_as_string": "2015-10-01T01:00:00.000Z",
"key": 1443661200000,
"doc_count": 1
},
{
"key_as_string": "2015-10-01T02:00:00.000Z",
"key": 1443664800000,
"doc_count": 1
}
],
"interval": "1h"
}
}
}
If a time_zone
of -01:00
is specified, then midnight starts at one hour before midnight UTC:
resp = client.search(
index="my-index-000001",
size="0",
aggs={
"by_day": {
"auto_date_histogram": {
"field": "date",
"buckets": 3,
"time_zone": "-01:00"
}
}
},
)
print(resp)
response = client.search(
index: 'my-index-000001',
size: 0,
body: {
aggregations: {
by_day: {
auto_date_histogram: {
field: 'date',
buckets: 3,
time_zone: '-01:00'
}
}
}
}
)
puts response
const response = await client.search({
index: "my-index-000001",
size: 0,
aggs: {
by_day: {
auto_date_histogram: {
field: "date",
buckets: 3,
time_zone: "-01:00",
},
},
},
});
console.log(response);
GET my-index-000001/_search?size=0
{
"aggs": {
"by_day": {
"auto_date_histogram": {
"field": "date",
"buckets" : 3,
"time_zone": "-01:00"
}
}
}
}
Now three 1-hour buckets are still returned but the first bucket starts at 11:00pm on 30 September 2015 since that is the local time for the bucket in the specified time zone.
{
...
"aggregations": {
"by_day": {
"buckets": [
{
"key_as_string": "2015-09-30T23:00:00.000-01:00",
"key": 1443657600000,
"doc_count": 1
},
{
"key_as_string": "2015-10-01T00:00:00.000-01:00",
"key": 1443661200000,
"doc_count": 1
},
{
"key_as_string": "2015-10-01T01:00:00.000-01:00",
"key": 1443664800000,
"doc_count": 1
}
],
"interval": "1h"
}
}
}
The |
When using time zones that follow DST (daylight savings time) changes, buckets close to the moment when those changes happen can have slightly different sizes than neighbouring buckets. For example, consider a DST start in the CET
time zone: on 27 March 2016 at 2am, clocks were turned forward 1 hour to 3am local time. If the result of the aggregation was daily buckets, the bucket covering that day will only hold data for 23 hours instead of the usual 24 hours for other buckets. The same is true for shorter intervals like e.g. 12h. Here, we will have only a 11h bucket on the morning of 27 March when the DST shift happens.
Minimum Interval parameter
The minimum_interval
allows the caller to specify the minimum rounding interval that should be used. This can make the collection process more efficient, as the aggregation will not attempt to round at any interval lower than minimum_interval
.
The accepted units for minimum_interval
are:
- year
- month
- day
- hour
- minute
- second
resp = client.search(
index="sales",
size="0",
aggs={
"sale_date": {
"auto_date_histogram": {
"field": "date",
"buckets": 10,
"minimum_interval": "minute"
}
}
},
)
print(resp)
response = client.search(
index: 'sales',
size: 0,
body: {
aggregations: {
sale_date: {
auto_date_histogram: {
field: 'date',
buckets: 10,
minimum_interval: 'minute'
}
}
}
}
)
puts response
const response = await client.search({
index: "sales",
size: 0,
aggs: {
sale_date: {
auto_date_histogram: {
field: "date",
buckets: 10,
minimum_interval: "minute",
},
},
},
});
console.log(response);
POST /sales/_search?size=0
{
"aggs": {
"sale_date": {
"auto_date_histogram": {
"field": "date",
"buckets": 10,
"minimum_interval": "minute"
}
}
}
}
Missing value
The missing
parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value.
resp = client.search(
index="sales",
size="0",
aggs={
"sale_date": {
"auto_date_histogram": {
"field": "date",
"buckets": 10,
"missing": "2000/01/01"
}
}
},
)
print(resp)
response = client.search(
index: 'sales',
size: 0,
body: {
aggregations: {
sale_date: {
auto_date_histogram: {
field: 'date',
buckets: 10,
missing: '2000/01/01'
}
}
}
}
)
puts response
const response = await client.search({
index: "sales",
size: 0,
aggs: {
sale_date: {
auto_date_histogram: {
field: "date",
buckets: 10,
missing: "2000/01/01",
},
},
},
});
console.log(response);
POST /sales/_search?size=0
{
"aggs": {
"sale_date": {
"auto_date_histogram": {
"field": "date",
"buckets": 10,
"missing": "2000/01/01"
}
}
}
}
Documents without a value in the |