Filters aggregation

Filters aggregation

A multi-bucket aggregation where each bucket contains the documents that match a query.

Example:

  1. resp = client.bulk(
  2. index="logs",
  3. refresh=True,
  4. operations=[
  5. {
  6. "index": {
  7. "_id": 1
  8. }
  9. },
  10. {
  11. "body": "warning: page could not be rendered"
  12. },
  13. {
  14. "index": {
  15. "_id": 2
  16. }
  17. },
  18. {
  19. "body": "authentication error"
  20. },
  21. {
  22. "index": {
  23. "_id": 3
  24. }
  25. },
  26. {
  27. "body": "warning: connection timed out"
  28. }
  29. ],
  30. )
  31. print(resp)
  32. resp1 = client.search(
  33. index="logs",
  34. size=0,
  35. aggs={
  36. "messages": {
  37. "filters": {
  38. "filters": {
  39. "errors": {
  40. "match": {
  41. "body": "error"
  42. }
  43. },
  44. "warnings": {
  45. "match": {
  46. "body": "warning"
  47. }
  48. }
  49. }
  50. }
  51. }
  52. },
  53. )
  54. print(resp1)
  1. response = client.bulk(
  2. index: 'logs',
  3. refresh: true,
  4. body: [
  5. {
  6. index: {
  7. _id: 1
  8. }
  9. },
  10. {
  11. body: 'warning: page could not be rendered'
  12. },
  13. {
  14. index: {
  15. _id: 2
  16. }
  17. },
  18. {
  19. body: 'authentication error'
  20. },
  21. {
  22. index: {
  23. _id: 3
  24. }
  25. },
  26. {
  27. body: 'warning: connection timed out'
  28. }
  29. ]
  30. )
  31. puts response
  32. response = client.search(
  33. index: 'logs',
  34. body: {
  35. size: 0,
  36. aggregations: {
  37. messages: {
  38. filters: {
  39. filters: {
  40. errors: {
  41. match: {
  42. body: 'error'
  43. }
  44. },
  45. warnings: {
  46. match: {
  47. body: 'warning'
  48. }
  49. }
  50. }
  51. }
  52. }
  53. }
  54. }
  55. )
  56. puts response
  1. const response = await client.bulk({
  2. index: "logs",
  3. refresh: "true",
  4. operations: [
  5. {
  6. index: {
  7. _id: 1,
  8. },
  9. },
  10. {
  11. body: "warning: page could not be rendered",
  12. },
  13. {
  14. index: {
  15. _id: 2,
  16. },
  17. },
  18. {
  19. body: "authentication error",
  20. },
  21. {
  22. index: {
  23. _id: 3,
  24. },
  25. },
  26. {
  27. body: "warning: connection timed out",
  28. },
  29. ],
  30. });
  31. console.log(response);
  32. const response1 = await client.search({
  33. index: "logs",
  34. size: 0,
  35. aggs: {
  36. messages: {
  37. filters: {
  38. filters: {
  39. errors: {
  40. match: {
  41. body: "error",
  42. },
  43. },
  44. warnings: {
  45. match: {
  46. body: "warning",
  47. },
  48. },
  49. },
  50. },
  51. },
  52. },
  53. });
  54. console.log(response1);
  1. PUT /logs/_bulk?refresh
  2. { "index" : { "_id" : 1 } }
  3. { "body" : "warning: page could not be rendered" }
  4. { "index" : { "_id" : 2 } }
  5. { "body" : "authentication error" }
  6. { "index" : { "_id" : 3 } }
  7. { "body" : "warning: connection timed out" }
  8. GET logs/_search
  9. {
  10. "size": 0,
  11. "aggs" : {
  12. "messages" : {
  13. "filters" : {
  14. "filters" : {
  15. "errors" : { "match" : { "body" : "error" }},
  16. "warnings" : { "match" : { "body" : "warning" }}
  17. }
  18. }
  19. }
  20. }
  21. }

In the above example, we analyze log messages. The aggregation will build two collection (buckets) of log messages - one for all those containing an error, and another for all those containing a warning.

Response:

  1. {
  2. "took": 9,
  3. "timed_out": false,
  4. "_shards": ...,
  5. "hits": ...,
  6. "aggregations": {
  7. "messages": {
  8. "buckets": {
  9. "errors": {
  10. "doc_count": 1
  11. },
  12. "warnings": {
  13. "doc_count": 2
  14. }
  15. }
  16. }
  17. }
  18. }

Anonymous filters

The filters field can also be provided as an array of filters, as in the following request:

  1. resp = client.search(
  2. index="logs",
  3. size=0,
  4. aggs={
  5. "messages": {
  6. "filters": {
  7. "filters": [
  8. {
  9. "match": {
  10. "body": "error"
  11. }
  12. },
  13. {
  14. "match": {
  15. "body": "warning"
  16. }
  17. }
  18. ]
  19. }
  20. }
  21. },
  22. )
  23. print(resp)
  1. response = client.search(
  2. index: 'logs',
  3. body: {
  4. size: 0,
  5. aggregations: {
  6. messages: {
  7. filters: {
  8. filters: [
  9. {
  10. match: {
  11. body: 'error'
  12. }
  13. },
  14. {
  15. match: {
  16. body: 'warning'
  17. }
  18. }
  19. ]
  20. }
  21. }
  22. }
  23. }
  24. )
  25. puts response
  1. const response = await client.search({
  2. index: "logs",
  3. size: 0,
  4. aggs: {
  5. messages: {
  6. filters: {
  7. filters: [
  8. {
  9. match: {
  10. body: "error",
  11. },
  12. },
  13. {
  14. match: {
  15. body: "warning",
  16. },
  17. },
  18. ],
  19. },
  20. },
  21. },
  22. });
  23. console.log(response);
  1. GET logs/_search
  2. {
  3. "size": 0,
  4. "aggs" : {
  5. "messages" : {
  6. "filters" : {
  7. "filters" : [
  8. { "match" : { "body" : "error" }},
  9. { "match" : { "body" : "warning" }}
  10. ]
  11. }
  12. }
  13. }
  14. }

The filtered buckets are returned in the same order as provided in the request. The response for this example would be:

  1. {
  2. "took": 4,
  3. "timed_out": false,
  4. "_shards": ...,
  5. "hits": ...,
  6. "aggregations": {
  7. "messages": {
  8. "buckets": [
  9. {
  10. "doc_count": 1
  11. },
  12. {
  13. "doc_count": 2
  14. }
  15. ]
  16. }
  17. }
  18. }

Other Bucket

The other_bucket parameter can be set to add a bucket to the response which will contain all documents that do not match any of the given filters. The value of this parameter can be as follows:

false

Does not compute the other bucket

true

Returns the other bucket either in a bucket (named _other_ by default) if named filters are being used, or as the last bucket if anonymous filters are being used

The other_bucket_key parameter can be used to set the key for the other bucket to a value other than the default _other_. Setting this parameter will implicitly set the other_bucket parameter to true.

The following snippet shows a response where the other bucket is requested to be named other_messages.

  1. resp = client.index(
  2. index="logs",
  3. id="4",
  4. refresh=True,
  5. document={
  6. "body": "info: user Bob logged out"
  7. },
  8. )
  9. print(resp)
  10. resp1 = client.search(
  11. index="logs",
  12. size=0,
  13. aggs={
  14. "messages": {
  15. "filters": {
  16. "other_bucket_key": "other_messages",
  17. "filters": {
  18. "errors": {
  19. "match": {
  20. "body": "error"
  21. }
  22. },
  23. "warnings": {
  24. "match": {
  25. "body": "warning"
  26. }
  27. }
  28. }
  29. }
  30. }
  31. },
  32. )
  33. print(resp1)
  1. response = client.index(
  2. index: 'logs',
  3. id: 4,
  4. refresh: true,
  5. body: {
  6. body: 'info: user Bob logged out'
  7. }
  8. )
  9. puts response
  10. response = client.search(
  11. index: 'logs',
  12. body: {
  13. size: 0,
  14. aggregations: {
  15. messages: {
  16. filters: {
  17. other_bucket_key: 'other_messages',
  18. filters: {
  19. errors: {
  20. match: {
  21. body: 'error'
  22. }
  23. },
  24. warnings: {
  25. match: {
  26. body: 'warning'
  27. }
  28. }
  29. }
  30. }
  31. }
  32. }
  33. }
  34. )
  35. puts response
  1. const response = await client.index({
  2. index: "logs",
  3. id: 4,
  4. refresh: "true",
  5. document: {
  6. body: "info: user Bob logged out",
  7. },
  8. });
  9. console.log(response);
  10. const response1 = await client.search({
  11. index: "logs",
  12. size: 0,
  13. aggs: {
  14. messages: {
  15. filters: {
  16. other_bucket_key: "other_messages",
  17. filters: {
  18. errors: {
  19. match: {
  20. body: "error",
  21. },
  22. },
  23. warnings: {
  24. match: {
  25. body: "warning",
  26. },
  27. },
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response1);
  1. PUT logs/_doc/4?refresh
  2. {
  3. "body": "info: user Bob logged out"
  4. }
  5. GET logs/_search
  6. {
  7. "size": 0,
  8. "aggs" : {
  9. "messages" : {
  10. "filters" : {
  11. "other_bucket_key": "other_messages",
  12. "filters" : {
  13. "errors" : { "match" : { "body" : "error" }},
  14. "warnings" : { "match" : { "body" : "warning" }}
  15. }
  16. }
  17. }
  18. }
  19. }

The response would be something like the following:

  1. {
  2. "took": 3,
  3. "timed_out": false,
  4. "_shards": ...,
  5. "hits": ...,
  6. "aggregations": {
  7. "messages": {
  8. "buckets": {
  9. "errors": {
  10. "doc_count": 1
  11. },
  12. "warnings": {
  13. "doc_count": 2
  14. },
  15. "other_messages": {
  16. "doc_count": 1
  17. }
  18. }
  19. }
  20. }
  21. }

Non-keyed Response

By default, the named filters aggregation returns the buckets as an object. But in some sorting cases, such as bucket sort, the JSON doesn’t guarantee the order of elements in the object. You can use the keyed parameter to specify the buckets as an array of objects. The value of this parameter can be as follows:

true

(Default) Returns the buckets as an object

false

Returns the buckets as an array of objects

This parameter is ignored by Anonymous filters.

Example:

  1. resp = client.search(
  2. index="sales",
  3. size="0",
  4. filter_path="aggregations",
  5. aggs={
  6. "the_filter": {
  7. "filters": {
  8. "keyed": False,
  9. "filters": {
  10. "t-shirt": {
  11. "term": {
  12. "type": "t-shirt"
  13. }
  14. },
  15. "hat": {
  16. "term": {
  17. "type": "hat"
  18. }
  19. }
  20. }
  21. },
  22. "aggs": {
  23. "avg_price": {
  24. "avg": {
  25. "field": "price"
  26. }
  27. },
  28. "sort_by_avg_price": {
  29. "bucket_sort": {
  30. "sort": {
  31. "avg_price": "asc"
  32. }
  33. }
  34. }
  35. }
  36. }
  37. },
  38. )
  39. print(resp)
  1. response = client.search(
  2. index: 'sales',
  3. size: 0,
  4. filter_path: 'aggregations',
  5. body: {
  6. aggregations: {
  7. the_filter: {
  8. filters: {
  9. keyed: false,
  10. filters: {
  11. "t-shirt": {
  12. term: {
  13. type: 't-shirt'
  14. }
  15. },
  16. hat: {
  17. term: {
  18. type: 'hat'
  19. }
  20. }
  21. }
  22. },
  23. aggregations: {
  24. avg_price: {
  25. avg: {
  26. field: 'price'
  27. }
  28. },
  29. sort_by_avg_price: {
  30. bucket_sort: {
  31. sort: {
  32. avg_price: 'asc'
  33. }
  34. }
  35. }
  36. }
  37. }
  38. }
  39. }
  40. )
  41. puts response
  1. const response = await client.search({
  2. index: "sales",
  3. size: 0,
  4. filter_path: "aggregations",
  5. aggs: {
  6. the_filter: {
  7. filters: {
  8. keyed: false,
  9. filters: {
  10. "t-shirt": {
  11. term: {
  12. type: "t-shirt",
  13. },
  14. },
  15. hat: {
  16. term: {
  17. type: "hat",
  18. },
  19. },
  20. },
  21. },
  22. aggs: {
  23. avg_price: {
  24. avg: {
  25. field: "price",
  26. },
  27. },
  28. sort_by_avg_price: {
  29. bucket_sort: {
  30. sort: {
  31. avg_price: "asc",
  32. },
  33. },
  34. },
  35. },
  36. },
  37. },
  38. });
  39. console.log(response);
  1. POST /sales/_search?size=0&filter_path=aggregations
  2. {
  3. "aggs": {
  4. "the_filter": {
  5. "filters": {
  6. "keyed": false,
  7. "filters": {
  8. "t-shirt": { "term": { "type": "t-shirt" } },
  9. "hat": { "term": { "type": "hat" } }
  10. }
  11. },
  12. "aggs": {
  13. "avg_price": { "avg": { "field": "price" } },
  14. "sort_by_avg_price": {
  15. "bucket_sort": { "sort": { "avg_price": "asc" } }
  16. }
  17. }
  18. }
  19. }
  20. }

Response:

  1. {
  2. "aggregations": {
  3. "the_filter": {
  4. "buckets": [
  5. {
  6. "key": "t-shirt",
  7. "doc_count": 3,
  8. "avg_price": { "value": 128.33333333333334 }
  9. },
  10. {
  11. "key": "hat",
  12. "doc_count": 3,
  13. "avg_price": { "value": 150.0 }
  14. }
  15. ]
  16. }
  17. }
  18. }