Range aggregation

Range aggregation

A multi-bucket value source based aggregation that enables the user to define a set of ranges - each representing a bucket. During the aggregation process, the values extracted from each document will be checked against each bucket range and “bucket” the relevant/matching document. Note that this aggregation includes the from value and excludes the to value for each range.

Example:

  1. resp = client.search(
  2. index="sales",
  3. aggs={
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "ranges": [
  8. {
  9. "to": 100
  10. },
  11. {
  12. "from": 100,
  13. "to": 200
  14. },
  15. {
  16. "from": 200
  17. }
  18. ]
  19. }
  20. }
  21. },
  22. )
  23. print(resp)
  1. response = client.search(
  2. index: 'sales',
  3. body: {
  4. aggregations: {
  5. price_ranges: {
  6. range: {
  7. field: 'price',
  8. ranges: [
  9. {
  10. to: 100
  11. },
  12. {
  13. from: 100,
  14. to: 200
  15. },
  16. {
  17. from: 200
  18. }
  19. ]
  20. }
  21. }
  22. }
  23. }
  24. )
  25. puts response
  1. const response = await client.search({
  2. index: "sales",
  3. aggs: {
  4. price_ranges: {
  5. range: {
  6. field: "price",
  7. ranges: [
  8. {
  9. to: 100,
  10. },
  11. {
  12. from: 100,
  13. to: 200,
  14. },
  15. {
  16. from: 200,
  17. },
  18. ],
  19. },
  20. },
  21. },
  22. });
  23. console.log(response);
  1. GET sales/_search
  2. {
  3. "aggs": {
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "ranges": [
  8. { "to": 100.0 },
  9. { "from": 100.0, "to": 200.0 },
  10. { "from": 200.0 }
  11. ]
  12. }
  13. }
  14. }
  15. }

Response:

  1. {
  2. ...
  3. "aggregations": {
  4. "price_ranges": {
  5. "buckets": [
  6. {
  7. "key": "*-100.0",
  8. "to": 100.0,
  9. "doc_count": 2
  10. },
  11. {
  12. "key": "100.0-200.0",
  13. "from": 100.0,
  14. "to": 200.0,
  15. "doc_count": 2
  16. },
  17. {
  18. "key": "200.0-*",
  19. "from": 200.0,
  20. "doc_count": 3
  21. }
  22. ]
  23. }
  24. }
  25. }

Keyed Response

Setting the keyed flag to true will associate a unique string key with each bucket and return the ranges as a hash rather than an array:

  1. resp = client.search(
  2. index="sales",
  3. aggs={
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "keyed": True,
  8. "ranges": [
  9. {
  10. "to": 100
  11. },
  12. {
  13. "from": 100,
  14. "to": 200
  15. },
  16. {
  17. "from": 200
  18. }
  19. ]
  20. }
  21. }
  22. },
  23. )
  24. print(resp)
  1. response = client.search(
  2. index: 'sales',
  3. body: {
  4. aggregations: {
  5. price_ranges: {
  6. range: {
  7. field: 'price',
  8. keyed: true,
  9. ranges: [
  10. {
  11. to: 100
  12. },
  13. {
  14. from: 100,
  15. to: 200
  16. },
  17. {
  18. from: 200
  19. }
  20. ]
  21. }
  22. }
  23. }
  24. }
  25. )
  26. puts response
  1. const response = await client.search({
  2. index: "sales",
  3. aggs: {
  4. price_ranges: {
  5. range: {
  6. field: "price",
  7. keyed: true,
  8. ranges: [
  9. {
  10. to: 100,
  11. },
  12. {
  13. from: 100,
  14. to: 200,
  15. },
  16. {
  17. from: 200,
  18. },
  19. ],
  20. },
  21. },
  22. },
  23. });
  24. console.log(response);
  1. GET sales/_search
  2. {
  3. "aggs": {
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "keyed": true,
  8. "ranges": [
  9. { "to": 100 },
  10. { "from": 100, "to": 200 },
  11. { "from": 200 }
  12. ]
  13. }
  14. }
  15. }
  16. }

Response:

  1. {
  2. ...
  3. "aggregations": {
  4. "price_ranges": {
  5. "buckets": {
  6. "*-100.0": {
  7. "to": 100.0,
  8. "doc_count": 2
  9. },
  10. "100.0-200.0": {
  11. "from": 100.0,
  12. "to": 200.0,
  13. "doc_count": 2
  14. },
  15. "200.0-*": {
  16. "from": 200.0,
  17. "doc_count": 3
  18. }
  19. }
  20. }
  21. }
  22. }

It is also possible to customize the key for each range:

  1. resp = client.search(
  2. index="sales",
  3. aggs={
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "keyed": True,
  8. "ranges": [
  9. {
  10. "key": "cheap",
  11. "to": 100
  12. },
  13. {
  14. "key": "average",
  15. "from": 100,
  16. "to": 200
  17. },
  18. {
  19. "key": "expensive",
  20. "from": 200
  21. }
  22. ]
  23. }
  24. }
  25. },
  26. )
  27. print(resp)
  1. response = client.search(
  2. index: 'sales',
  3. body: {
  4. aggregations: {
  5. price_ranges: {
  6. range: {
  7. field: 'price',
  8. keyed: true,
  9. ranges: [
  10. {
  11. key: 'cheap',
  12. to: 100
  13. },
  14. {
  15. key: 'average',
  16. from: 100,
  17. to: 200
  18. },
  19. {
  20. key: 'expensive',
  21. from: 200
  22. }
  23. ]
  24. }
  25. }
  26. }
  27. }
  28. )
  29. puts response
  1. const response = await client.search({
  2. index: "sales",
  3. aggs: {
  4. price_ranges: {
  5. range: {
  6. field: "price",
  7. keyed: true,
  8. ranges: [
  9. {
  10. key: "cheap",
  11. to: 100,
  12. },
  13. {
  14. key: "average",
  15. from: 100,
  16. to: 200,
  17. },
  18. {
  19. key: "expensive",
  20. from: 200,
  21. },
  22. ],
  23. },
  24. },
  25. },
  26. });
  27. console.log(response);
  1. GET sales/_search
  2. {
  3. "aggs": {
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "keyed": true,
  8. "ranges": [
  9. { "key": "cheap", "to": 100 },
  10. { "key": "average", "from": 100, "to": 200 },
  11. { "key": "expensive", "from": 200 }
  12. ]
  13. }
  14. }
  15. }
  16. }

Response:

  1. {
  2. ...
  3. "aggregations": {
  4. "price_ranges": {
  5. "buckets": {
  6. "cheap": {
  7. "to": 100.0,
  8. "doc_count": 2
  9. },
  10. "average": {
  11. "from": 100.0,
  12. "to": 200.0,
  13. "doc_count": 2
  14. },
  15. "expensive": {
  16. "from": 200.0,
  17. "doc_count": 3
  18. }
  19. }
  20. }
  21. }
  22. }

Script

If the data in your documents doesn’t exactly match what you’d like to aggregate, use a runtime field. For example, if you need to apply a particular currency conversion rate:

  1. resp = client.search(
  2. index="sales",
  3. runtime_mappings={
  4. "price.euros": {
  5. "type": "double",
  6. "script": {
  7. "source": "\n emit(doc['price'].value * params.conversion_rate)\n ",
  8. "params": {
  9. "conversion_rate": 0.835526591
  10. }
  11. }
  12. }
  13. },
  14. aggs={
  15. "price_ranges": {
  16. "range": {
  17. "field": "price.euros",
  18. "ranges": [
  19. {
  20. "to": 100
  21. },
  22. {
  23. "from": 100,
  24. "to": 200
  25. },
  26. {
  27. "from": 200
  28. }
  29. ]
  30. }
  31. }
  32. },
  33. )
  34. print(resp)
  1. response = client.search(
  2. index: 'sales',
  3. body: {
  4. runtime_mappings: {
  5. 'price.euros' => {
  6. type: 'double',
  7. script: {
  8. source: "\n emit(doc['price'].value * params.conversion_rate)\n ",
  9. params: {
  10. conversion_rate: 0.835526591
  11. }
  12. }
  13. }
  14. },
  15. aggregations: {
  16. price_ranges: {
  17. range: {
  18. field: 'price.euros',
  19. ranges: [
  20. {
  21. to: 100
  22. },
  23. {
  24. from: 100,
  25. to: 200
  26. },
  27. {
  28. from: 200
  29. }
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. )
  36. puts response
  1. const response = await client.search({
  2. index: "sales",
  3. runtime_mappings: {
  4. "price.euros": {
  5. type: "double",
  6. script: {
  7. source:
  8. "\n emit(doc['price'].value * params.conversion_rate)\n ",
  9. params: {
  10. conversion_rate: 0.835526591,
  11. },
  12. },
  13. },
  14. },
  15. aggs: {
  16. price_ranges: {
  17. range: {
  18. field: "price.euros",
  19. ranges: [
  20. {
  21. to: 100,
  22. },
  23. {
  24. from: 100,
  25. to: 200,
  26. },
  27. {
  28. from: 200,
  29. },
  30. ],
  31. },
  32. },
  33. },
  34. });
  35. console.log(response);
  1. GET sales/_search
  2. {
  3. "runtime_mappings": {
  4. "price.euros": {
  5. "type": "double",
  6. "script": {
  7. "source": """
  8. emit(doc['price'].value * params.conversion_rate)
  9. """,
  10. "params": {
  11. "conversion_rate": 0.835526591
  12. }
  13. }
  14. }
  15. },
  16. "aggs": {
  17. "price_ranges": {
  18. "range": {
  19. "field": "price.euros",
  20. "ranges": [
  21. { "to": 100 },
  22. { "from": 100, "to": 200 },
  23. { "from": 200 }
  24. ]
  25. }
  26. }
  27. }
  28. }

Sub Aggregations

The following example, not only “bucket” the documents to the different buckets but also computes statistics over the prices in each price range

  1. resp = client.search(
  2. index="sales",
  3. aggs={
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "ranges": [
  8. {
  9. "to": 100
  10. },
  11. {
  12. "from": 100,
  13. "to": 200
  14. },
  15. {
  16. "from": 200
  17. }
  18. ]
  19. },
  20. "aggs": {
  21. "price_stats": {
  22. "stats": {
  23. "field": "price"
  24. }
  25. }
  26. }
  27. }
  28. },
  29. )
  30. print(resp)
  1. response = client.search(
  2. index: 'sales',
  3. body: {
  4. aggregations: {
  5. price_ranges: {
  6. range: {
  7. field: 'price',
  8. ranges: [
  9. {
  10. to: 100
  11. },
  12. {
  13. from: 100,
  14. to: 200
  15. },
  16. {
  17. from: 200
  18. }
  19. ]
  20. },
  21. aggregations: {
  22. price_stats: {
  23. stats: {
  24. field: 'price'
  25. }
  26. }
  27. }
  28. }
  29. }
  30. }
  31. )
  32. puts response
  1. const response = await client.search({
  2. index: "sales",
  3. aggs: {
  4. price_ranges: {
  5. range: {
  6. field: "price",
  7. ranges: [
  8. {
  9. to: 100,
  10. },
  11. {
  12. from: 100,
  13. to: 200,
  14. },
  15. {
  16. from: 200,
  17. },
  18. ],
  19. },
  20. aggs: {
  21. price_stats: {
  22. stats: {
  23. field: "price",
  24. },
  25. },
  26. },
  27. },
  28. },
  29. });
  30. console.log(response);
  1. GET sales/_search
  2. {
  3. "aggs": {
  4. "price_ranges": {
  5. "range": {
  6. "field": "price",
  7. "ranges": [
  8. { "to": 100 },
  9. { "from": 100, "to": 200 },
  10. { "from": 200 }
  11. ]
  12. },
  13. "aggs": {
  14. "price_stats": {
  15. "stats": { "field": "price" }
  16. }
  17. }
  18. }
  19. }
  20. }

Response:

  1. {
  2. ...
  3. "aggregations": {
  4. "price_ranges": {
  5. "buckets": [
  6. {
  7. "key": "*-100.0",
  8. "to": 100.0,
  9. "doc_count": 2,
  10. "price_stats": {
  11. "count": 2,
  12. "min": 10.0,
  13. "max": 50.0,
  14. "avg": 30.0,
  15. "sum": 60.0
  16. }
  17. },
  18. {
  19. "key": "100.0-200.0",
  20. "from": 100.0,
  21. "to": 200.0,
  22. "doc_count": 2,
  23. "price_stats": {
  24. "count": 2,
  25. "min": 150.0,
  26. "max": 175.0,
  27. "avg": 162.5,
  28. "sum": 325.0
  29. }
  30. },
  31. {
  32. "key": "200.0-*",
  33. "from": 200.0,
  34. "doc_count": 3,
  35. "price_stats": {
  36. "count": 3,
  37. "min": 200.0,
  38. "max": 200.0,
  39. "avg": 200.0,
  40. "sum": 600.0
  41. }
  42. }
  43. ]
  44. }
  45. }
  46. }

Histogram fields

Running a range aggregation over histogram fields computes the total number of counts for each configured range.

This is done without interpolating between the histogram field values. Consequently, it is possible to have a range that is “in-between” two histogram values. The resulting range bucket would have a zero doc count.

Here is an example, executing a range aggregation against the following index that stores pre-aggregated histograms with latency metrics (in milliseconds) for different networks:

  1. resp = client.indices.create(
  2. index="metrics_index",
  3. mappings={
  4. "properties": {
  5. "network": {
  6. "properties": {
  7. "name": {
  8. "type": "keyword"
  9. }
  10. }
  11. },
  12. "latency_histo": {
  13. "type": "histogram"
  14. }
  15. }
  16. },
  17. )
  18. print(resp)
  19. resp1 = client.index(
  20. index="metrics_index",
  21. id="1",
  22. refresh=True,
  23. document={
  24. "network.name": "net-1",
  25. "latency_histo": {
  26. "values": [
  27. 1,
  28. 3,
  29. 8,
  30. 12,
  31. 15
  32. ],
  33. "counts": [
  34. 3,
  35. 7,
  36. 23,
  37. 12,
  38. 6
  39. ]
  40. }
  41. },
  42. )
  43. print(resp1)
  44. resp2 = client.index(
  45. index="metrics_index",
  46. id="2",
  47. refresh=True,
  48. document={
  49. "network.name": "net-2",
  50. "latency_histo": {
  51. "values": [
  52. 1,
  53. 6,
  54. 8,
  55. 12,
  56. 14
  57. ],
  58. "counts": [
  59. 8,
  60. 17,
  61. 8,
  62. 7,
  63. 6
  64. ]
  65. }
  66. },
  67. )
  68. print(resp2)
  69. resp3 = client.search(
  70. index="metrics_index",
  71. size="0",
  72. filter_path="aggregations",
  73. aggs={
  74. "latency_ranges": {
  75. "range": {
  76. "field": "latency_histo",
  77. "ranges": [
  78. {
  79. "to": 2
  80. },
  81. {
  82. "from": 2,
  83. "to": 3
  84. },
  85. {
  86. "from": 3,
  87. "to": 10
  88. },
  89. {
  90. "from": 10
  91. }
  92. ]
  93. }
  94. }
  95. },
  96. )
  97. print(resp3)
  1. response = client.indices.create(
  2. index: 'metrics_index',
  3. body: {
  4. mappings: {
  5. properties: {
  6. network: {
  7. properties: {
  8. name: {
  9. type: 'keyword'
  10. }
  11. }
  12. },
  13. latency_histo: {
  14. type: 'histogram'
  15. }
  16. }
  17. }
  18. }
  19. )
  20. puts response
  21. response = client.index(
  22. index: 'metrics_index',
  23. id: 1,
  24. refresh: true,
  25. body: {
  26. 'network.name' => 'net-1',
  27. latency_histo: {
  28. values: [
  29. 1,
  30. 3,
  31. 8,
  32. 12,
  33. 15
  34. ],
  35. counts: [
  36. 3,
  37. 7,
  38. 23,
  39. 12,
  40. 6
  41. ]
  42. }
  43. }
  44. )
  45. puts response
  46. response = client.index(
  47. index: 'metrics_index',
  48. id: 2,
  49. refresh: true,
  50. body: {
  51. 'network.name' => 'net-2',
  52. latency_histo: {
  53. values: [
  54. 1,
  55. 6,
  56. 8,
  57. 12,
  58. 14
  59. ],
  60. counts: [
  61. 8,
  62. 17,
  63. 8,
  64. 7,
  65. 6
  66. ]
  67. }
  68. }
  69. )
  70. puts response
  71. response = client.search(
  72. index: 'metrics_index',
  73. size: 0,
  74. filter_path: 'aggregations',
  75. body: {
  76. aggregations: {
  77. latency_ranges: {
  78. range: {
  79. field: 'latency_histo',
  80. ranges: [
  81. {
  82. to: 2
  83. },
  84. {
  85. from: 2,
  86. to: 3
  87. },
  88. {
  89. from: 3,
  90. to: 10
  91. },
  92. {
  93. from: 10
  94. }
  95. ]
  96. }
  97. }
  98. }
  99. }
  100. )
  101. puts response
  1. const response = await client.indices.create({
  2. index: "metrics_index",
  3. mappings: {
  4. properties: {
  5. network: {
  6. properties: {
  7. name: {
  8. type: "keyword",
  9. },
  10. },
  11. },
  12. latency_histo: {
  13. type: "histogram",
  14. },
  15. },
  16. },
  17. });
  18. console.log(response);
  19. const response1 = await client.index({
  20. index: "metrics_index",
  21. id: 1,
  22. refresh: "true",
  23. document: {
  24. "network.name": "net-1",
  25. latency_histo: {
  26. values: [1, 3, 8, 12, 15],
  27. counts: [3, 7, 23, 12, 6],
  28. },
  29. },
  30. });
  31. console.log(response1);
  32. const response2 = await client.index({
  33. index: "metrics_index",
  34. id: 2,
  35. refresh: "true",
  36. document: {
  37. "network.name": "net-2",
  38. latency_histo: {
  39. values: [1, 6, 8, 12, 14],
  40. counts: [8, 17, 8, 7, 6],
  41. },
  42. },
  43. });
  44. console.log(response2);
  45. const response3 = await client.search({
  46. index: "metrics_index",
  47. size: 0,
  48. filter_path: "aggregations",
  49. aggs: {
  50. latency_ranges: {
  51. range: {
  52. field: "latency_histo",
  53. ranges: [
  54. {
  55. to: 2,
  56. },
  57. {
  58. from: 2,
  59. to: 3,
  60. },
  61. {
  62. from: 3,
  63. to: 10,
  64. },
  65. {
  66. from: 10,
  67. },
  68. ],
  69. },
  70. },
  71. },
  72. });
  73. console.log(response3);
  1. PUT metrics_index
  2. {
  3. "mappings": {
  4. "properties": {
  5. "network": {
  6. "properties": {
  7. "name": {
  8. "type": "keyword"
  9. }
  10. }
  11. },
  12. "latency_histo": {
  13. "type": "histogram"
  14. }
  15. }
  16. }
  17. }
  18. PUT metrics_index/_doc/1?refresh
  19. {
  20. "network.name" : "net-1",
  21. "latency_histo" : {
  22. "values" : [1, 3, 8, 12, 15],
  23. "counts" : [3, 7, 23, 12, 6]
  24. }
  25. }
  26. PUT metrics_index/_doc/2?refresh
  27. {
  28. "network.name" : "net-2",
  29. "latency_histo" : {
  30. "values" : [1, 6, 8, 12, 14],
  31. "counts" : [8, 17, 8, 7, 6]
  32. }
  33. }
  34. GET metrics_index/_search?size=0&filter_path=aggregations
  35. {
  36. "aggs": {
  37. "latency_ranges": {
  38. "range": {
  39. "field": "latency_histo",
  40. "ranges": [
  41. {"to": 2},
  42. {"from": 2, "to": 3},
  43. {"from": 3, "to": 10},
  44. {"from": 10}
  45. ]
  46. }
  47. }
  48. }
  49. }

The range aggregation will sum the counts of each range computed based on the values and return the following output:

  1. {
  2. "aggregations": {
  3. "latency_ranges": {
  4. "buckets": [
  5. {
  6. "key": "*-2.0",
  7. "to": 2.0,
  8. "doc_count": 11
  9. },
  10. {
  11. "key": "2.0-3.0",
  12. "from": 2.0,
  13. "to": 3.0,
  14. "doc_count": 0
  15. },
  16. {
  17. "key": "3.0-10.0",
  18. "from": 3.0,
  19. "to": 10.0,
  20. "doc_count": 55
  21. },
  22. {
  23. "key": "10.0-*",
  24. "from": 10.0,
  25. "doc_count": 31
  26. }
  27. ]
  28. }
  29. }
  30. }

Range aggregation is a bucket aggregation, which partitions documents into buckets rather than calculating metrics over fields like metrics aggregations do. Each bucket represents a collection of documents which sub-aggregations can run on. On the other hand, a histogram field is a pre-aggregated field representing multiple values inside a single field: buckets of numerical data and a count of items/documents for each bucket. This mismatch between the range aggregations expected input (expecting raw documents) and the histogram field (that provides summary information) limits the outcome of the aggregation to only the doc counts for each bucket.

Consequently, when executing a range aggregation over a histogram field, no sub-aggregations are allowed.