Avg aggregation

Avg aggregation

A single-value metrics aggregation that computes the average of numeric values that are extracted from the aggregated documents. These values can be extracted either from specific numeric or histogram fields in the documents.

Assuming the data consists of documents representing exams grades (between 0 and 100) of students we can average their scores with:

  1. resp = client.search(
  2. index="exams",
  3. size="0",
  4. aggs={
  5. "avg_grade": {
  6. "avg": {
  7. "field": "grade"
  8. }
  9. }
  10. },
  11. )
  12. print(resp)
  1. response = client.search(
  2. index: 'exams',
  3. size: 0,
  4. body: {
  5. aggregations: {
  6. avg_grade: {
  7. avg: {
  8. field: 'grade'
  9. }
  10. }
  11. }
  12. }
  13. )
  14. puts response
  1. const response = await client.search({
  2. index: "exams",
  3. size: 0,
  4. aggs: {
  5. avg_grade: {
  6. avg: {
  7. field: "grade",
  8. },
  9. },
  10. },
  11. });
  12. console.log(response);
  1. POST /exams/_search?size=0
  2. {
  3. "aggs": {
  4. "avg_grade": { "avg": { "field": "grade" } }
  5. }
  6. }

The above aggregation computes the average grade over all documents. The aggregation type is avg and the field setting defines the numeric field of the documents the average will be computed on. The above will return the following:

  1. {
  2. ...
  3. "aggregations": {
  4. "avg_grade": {
  5. "value": 75.0
  6. }
  7. }
  8. }

The name of the aggregation (avg_grade above) also serves as the key by which the aggregation result can be retrieved from the returned response.

Script

Let’s say the exam was exceedingly difficult, and you need to apply a grade correction. Average a runtime field to get a corrected average:

  1. resp = client.search(
  2. index="exams",
  3. size="0",
  4. runtime_mappings={
  5. "grade.corrected": {
  6. "type": "double",
  7. "script": {
  8. "source": "emit(Math.min(100, doc['grade'].value * params.correction))",
  9. "params": {
  10. "correction": 1.2
  11. }
  12. }
  13. }
  14. },
  15. aggs={
  16. "avg_corrected_grade": {
  17. "avg": {
  18. "field": "grade.corrected"
  19. }
  20. }
  21. },
  22. )
  23. print(resp)
  1. response = client.search(
  2. index: 'exams',
  3. size: 0,
  4. body: {
  5. runtime_mappings: {
  6. 'grade.corrected' => {
  7. type: 'double',
  8. script: {
  9. source: "emit(Math.min(100, doc['grade'].value * params.correction))",
  10. params: {
  11. correction: 1.2
  12. }
  13. }
  14. }
  15. },
  16. aggregations: {
  17. avg_corrected_grade: {
  18. avg: {
  19. field: 'grade.corrected'
  20. }
  21. }
  22. }
  23. }
  24. )
  25. puts response
  1. const response = await client.search({
  2. index: "exams",
  3. size: 0,
  4. runtime_mappings: {
  5. "grade.corrected": {
  6. type: "double",
  7. script: {
  8. source: "emit(Math.min(100, doc['grade'].value * params.correction))",
  9. params: {
  10. correction: 1.2,
  11. },
  12. },
  13. },
  14. },
  15. aggs: {
  16. avg_corrected_grade: {
  17. avg: {
  18. field: "grade.corrected",
  19. },
  20. },
  21. },
  22. });
  23. console.log(response);
  1. POST /exams/_search?size=0
  2. {
  3. "runtime_mappings": {
  4. "grade.corrected": {
  5. "type": "double",
  6. "script": {
  7. "source": "emit(Math.min(100, doc['grade'].value * params.correction))",
  8. "params": {
  9. "correction": 1.2
  10. }
  11. }
  12. }
  13. },
  14. "aggs": {
  15. "avg_corrected_grade": {
  16. "avg": {
  17. "field": "grade.corrected"
  18. }
  19. }
  20. }
  21. }

Missing value

The missing parameter defines how documents that are missing a value should be treated. By default they will be ignored but it is also possible to treat them as if they had a value.

  1. resp = client.search(
  2. index="exams",
  3. size="0",
  4. aggs={
  5. "grade_avg": {
  6. "avg": {
  7. "field": "grade",
  8. "missing": 10
  9. }
  10. }
  11. },
  12. )
  13. print(resp)
  1. response = client.search(
  2. index: 'exams',
  3. size: 0,
  4. body: {
  5. aggregations: {
  6. grade_avg: {
  7. avg: {
  8. field: 'grade',
  9. missing: 10
  10. }
  11. }
  12. }
  13. }
  14. )
  15. puts response
  1. const response = await client.search({
  2. index: "exams",
  3. size: 0,
  4. aggs: {
  5. grade_avg: {
  6. avg: {
  7. field: "grade",
  8. missing: 10,
  9. },
  10. },
  11. },
  12. });
  13. console.log(response);
  1. POST /exams/_search?size=0
  2. {
  3. "aggs": {
  4. "grade_avg": {
  5. "avg": {
  6. "field": "grade",
  7. "missing": 10
  8. }
  9. }
  10. }
  11. }

Documents without a value in the grade field will fall into the same bucket as documents that have the value 10.

Histogram fields

When average is computed on histogram fields, the result of the aggregation is the weighted average of all elements in the values array taking into consideration the number in the same position in the counts array.

For example, for the following index that stores pre-aggregated histograms with latency metrics for different networks:

  1. resp = client.index(
  2. index="metrics_index",
  3. id="1",
  4. document={
  5. "network.name": "net-1",
  6. "latency_histo": {
  7. "values": [
  8. 0.1,
  9. 0.2,
  10. 0.3,
  11. 0.4,
  12. 0.5
  13. ],
  14. "counts": [
  15. 3,
  16. 7,
  17. 23,
  18. 12,
  19. 6
  20. ]
  21. }
  22. },
  23. )
  24. print(resp)
  25. resp1 = client.index(
  26. index="metrics_index",
  27. id="2",
  28. document={
  29. "network.name": "net-2",
  30. "latency_histo": {
  31. "values": [
  32. 0.1,
  33. 0.2,
  34. 0.3,
  35. 0.4,
  36. 0.5
  37. ],
  38. "counts": [
  39. 8,
  40. 17,
  41. 8,
  42. 7,
  43. 6
  44. ]
  45. }
  46. },
  47. )
  48. print(resp1)
  49. resp2 = client.search(
  50. index="metrics_index",
  51. size="0",
  52. aggs={
  53. "avg_latency": {
  54. "avg": {
  55. "field": "latency_histo"
  56. }
  57. }
  58. },
  59. )
  60. print(resp2)
  1. response = client.index(
  2. index: 'metrics_index',
  3. id: 1,
  4. body: {
  5. 'network.name' => 'net-1',
  6. latency_histo: {
  7. values: [
  8. 0.1,
  9. 0.2,
  10. 0.3,
  11. 0.4,
  12. 0.5
  13. ],
  14. counts: [
  15. 3,
  16. 7,
  17. 23,
  18. 12,
  19. 6
  20. ]
  21. }
  22. }
  23. )
  24. puts response
  25. response = client.index(
  26. index: 'metrics_index',
  27. id: 2,
  28. body: {
  29. 'network.name' => 'net-2',
  30. latency_histo: {
  31. values: [
  32. 0.1,
  33. 0.2,
  34. 0.3,
  35. 0.4,
  36. 0.5
  37. ],
  38. counts: [
  39. 8,
  40. 17,
  41. 8,
  42. 7,
  43. 6
  44. ]
  45. }
  46. }
  47. )
  48. puts response
  49. response = client.search(
  50. index: 'metrics_index',
  51. size: 0,
  52. body: {
  53. aggregations: {
  54. avg_latency: {
  55. avg: {
  56. field: 'latency_histo'
  57. }
  58. }
  59. }
  60. }
  61. )
  62. puts response
  1. const response = await client.index({
  2. index: "metrics_index",
  3. id: 1,
  4. document: {
  5. "network.name": "net-1",
  6. latency_histo: {
  7. values: [0.1, 0.2, 0.3, 0.4, 0.5],
  8. counts: [3, 7, 23, 12, 6],
  9. },
  10. },
  11. });
  12. console.log(response);
  13. const response1 = await client.index({
  14. index: "metrics_index",
  15. id: 2,
  16. document: {
  17. "network.name": "net-2",
  18. latency_histo: {
  19. values: [0.1, 0.2, 0.3, 0.4, 0.5],
  20. counts: [8, 17, 8, 7, 6],
  21. },
  22. },
  23. });
  24. console.log(response1);
  25. const response2 = await client.search({
  26. index: "metrics_index",
  27. size: 0,
  28. aggs: {
  29. avg_latency: {
  30. avg: {
  31. field: "latency_histo",
  32. },
  33. },
  34. },
  35. });
  36. console.log(response2);
  1. PUT metrics_index/_doc/1
  2. {
  3. "network.name" : "net-1",
  4. "latency_histo" : {
  5. "values" : [0.1, 0.2, 0.3, 0.4, 0.5],
  6. "counts" : [3, 7, 23, 12, 6]
  7. }
  8. }
  9. PUT metrics_index/_doc/2
  10. {
  11. "network.name" : "net-2",
  12. "latency_histo" : {
  13. "values" : [0.1, 0.2, 0.3, 0.4, 0.5],
  14. "counts" : [8, 17, 8, 7, 6]
  15. }
  16. }
  17. POST /metrics_index/_search?size=0
  18. {
  19. "aggs": {
  20. "avg_latency":
  21. { "avg": { "field": "latency_histo" }
  22. }
  23. }
  24. }

For each histogram field the avg aggregation adds each number in the values array <1> multiplied by its associated count in the counts array <2>. Eventually, it will compute the average over those values for all histograms and return the following result:

  1. {
  2. ...
  3. "aggregations": {
  4. "avg_latency": {
  5. "value": 0.29690721649
  6. }
  7. }
  8. }