Geo-centroid aggregation

Geo-centroid aggregation

A metric aggregation that computes the weighted centroid from all coordinate values for geo fields.

Example:

  1. resp = client.indices.create(
  2. index="museums",
  3. mappings={
  4. "properties": {
  5. "location": {
  6. "type": "geo_point"
  7. }
  8. }
  9. },
  10. )
  11. print(resp)
  12. resp1 = client.bulk(
  13. index="museums",
  14. refresh=True,
  15. operations=[
  16. {
  17. "index": {
  18. "_id": 1
  19. }
  20. },
  21. {
  22. "location": "POINT (4.912350 52.374081)",
  23. "city": "Amsterdam",
  24. "name": "NEMO Science Museum"
  25. },
  26. {
  27. "index": {
  28. "_id": 2
  29. }
  30. },
  31. {
  32. "location": "POINT (4.901618 52.369219)",
  33. "city": "Amsterdam",
  34. "name": "Museum Het Rembrandthuis"
  35. },
  36. {
  37. "index": {
  38. "_id": 3
  39. }
  40. },
  41. {
  42. "location": "POINT (4.914722 52.371667)",
  43. "city": "Amsterdam",
  44. "name": "Nederlands Scheepvaartmuseum"
  45. },
  46. {
  47. "index": {
  48. "_id": 4
  49. }
  50. },
  51. {
  52. "location": "POINT (4.405200 51.222900)",
  53. "city": "Antwerp",
  54. "name": "Letterenhuis"
  55. },
  56. {
  57. "index": {
  58. "_id": 5
  59. }
  60. },
  61. {
  62. "location": "POINT (2.336389 48.861111)",
  63. "city": "Paris",
  64. "name": "Musée du Louvre"
  65. },
  66. {
  67. "index": {
  68. "_id": 6
  69. }
  70. },
  71. {
  72. "location": "POINT (2.327000 48.860000)",
  73. "city": "Paris",
  74. "name": "Musée d'Orsay"
  75. }
  76. ],
  77. )
  78. print(resp1)
  79. resp2 = client.search(
  80. index="museums",
  81. size="0",
  82. aggs={
  83. "centroid": {
  84. "geo_centroid": {
  85. "field": "location"
  86. }
  87. }
  88. },
  89. )
  90. print(resp2)
  1. response = client.indices.create(
  2. index: 'museums',
  3. body: {
  4. mappings: {
  5. properties: {
  6. location: {
  7. type: 'geo_point'
  8. }
  9. }
  10. }
  11. }
  12. )
  13. puts response
  14. response = client.bulk(
  15. index: 'museums',
  16. refresh: true,
  17. body: [
  18. {
  19. index: {
  20. _id: 1
  21. }
  22. },
  23. {
  24. location: 'POINT (4.912350 52.374081)',
  25. city: 'Amsterdam',
  26. name: 'NEMO Science Museum'
  27. },
  28. {
  29. index: {
  30. _id: 2
  31. }
  32. },
  33. {
  34. location: 'POINT (4.901618 52.369219)',
  35. city: 'Amsterdam',
  36. name: 'Museum Het Rembrandthuis'
  37. },
  38. {
  39. index: {
  40. _id: 3
  41. }
  42. },
  43. {
  44. location: 'POINT (4.914722 52.371667)',
  45. city: 'Amsterdam',
  46. name: 'Nederlands Scheepvaartmuseum'
  47. },
  48. {
  49. index: {
  50. _id: 4
  51. }
  52. },
  53. {
  54. location: 'POINT (4.405200 51.222900)',
  55. city: 'Antwerp',
  56. name: 'Letterenhuis'
  57. },
  58. {
  59. index: {
  60. _id: 5
  61. }
  62. },
  63. {
  64. location: 'POINT (2.336389 48.861111)',
  65. city: 'Paris',
  66. name: 'Musée du Louvre'
  67. },
  68. {
  69. index: {
  70. _id: 6
  71. }
  72. },
  73. {
  74. location: 'POINT (2.327000 48.860000)',
  75. city: 'Paris',
  76. name: "Musée d'Orsay"
  77. }
  78. ]
  79. )
  80. puts response
  81. response = client.search(
  82. index: 'museums',
  83. size: 0,
  84. body: {
  85. aggregations: {
  86. centroid: {
  87. geo_centroid: {
  88. field: 'location'
  89. }
  90. }
  91. }
  92. }
  93. )
  94. puts response
  1. const response = await client.indices.create({
  2. index: "museums",
  3. mappings: {
  4. properties: {
  5. location: {
  6. type: "geo_point",
  7. },
  8. },
  9. },
  10. });
  11. console.log(response);
  12. const response1 = await client.bulk({
  13. index: "museums",
  14. refresh: "true",
  15. operations: [
  16. {
  17. index: {
  18. _id: 1,
  19. },
  20. },
  21. {
  22. location: "POINT (4.912350 52.374081)",
  23. city: "Amsterdam",
  24. name: "NEMO Science Museum",
  25. },
  26. {
  27. index: {
  28. _id: 2,
  29. },
  30. },
  31. {
  32. location: "POINT (4.901618 52.369219)",
  33. city: "Amsterdam",
  34. name: "Museum Het Rembrandthuis",
  35. },
  36. {
  37. index: {
  38. _id: 3,
  39. },
  40. },
  41. {
  42. location: "POINT (4.914722 52.371667)",
  43. city: "Amsterdam",
  44. name: "Nederlands Scheepvaartmuseum",
  45. },
  46. {
  47. index: {
  48. _id: 4,
  49. },
  50. },
  51. {
  52. location: "POINT (4.405200 51.222900)",
  53. city: "Antwerp",
  54. name: "Letterenhuis",
  55. },
  56. {
  57. index: {
  58. _id: 5,
  59. },
  60. },
  61. {
  62. location: "POINT (2.336389 48.861111)",
  63. city: "Paris",
  64. name: "Musée du Louvre",
  65. },
  66. {
  67. index: {
  68. _id: 6,
  69. },
  70. },
  71. {
  72. location: "POINT (2.327000 48.860000)",
  73. city: "Paris",
  74. name: "Musée d'Orsay",
  75. },
  76. ],
  77. });
  78. console.log(response1);
  79. const response2 = await client.search({
  80. index: "museums",
  81. size: 0,
  82. aggs: {
  83. centroid: {
  84. geo_centroid: {
  85. field: "location",
  86. },
  87. },
  88. },
  89. });
  90. console.log(response2);
  1. PUT /museums
  2. {
  3. "mappings": {
  4. "properties": {
  5. "location": {
  6. "type": "geo_point"
  7. }
  8. }
  9. }
  10. }
  11. POST /museums/_bulk?refresh
  12. {"index":{"_id":1}}
  13. {"location": "POINT (4.912350 52.374081)", "city": "Amsterdam", "name": "NEMO Science Museum"}
  14. {"index":{"_id":2}}
  15. {"location": "POINT (4.901618 52.369219)", "city": "Amsterdam", "name": "Museum Het Rembrandthuis"}
  16. {"index":{"_id":3}}
  17. {"location": "POINT (4.914722 52.371667)", "city": "Amsterdam", "name": "Nederlands Scheepvaartmuseum"}
  18. {"index":{"_id":4}}
  19. {"location": "POINT (4.405200 51.222900)", "city": "Antwerp", "name": "Letterenhuis"}
  20. {"index":{"_id":5}}
  21. {"location": "POINT (2.336389 48.861111)", "city": "Paris", "name": "Musée du Louvre"}
  22. {"index":{"_id":6}}
  23. {"location": "POINT (2.327000 48.860000)", "city": "Paris", "name": "Musée d'Orsay"}
  24. POST /museums/_search?size=0
  25. {
  26. "aggs": {
  27. "centroid": {
  28. "geo_centroid": {
  29. "field": "location"
  30. }
  31. }
  32. }
  33. }

The geo_centroid aggregation specifies the field to use for computing the centroid. (NOTE: field must be a Geopoint type)

The above aggregation demonstrates how one would compute the centroid of the location field for all museums’ documents.

The response for the above aggregation:

  1. {
  2. ...
  3. "aggregations": {
  4. "centroid": {
  5. "location": {
  6. "lat": 51.00982965203002,
  7. "lon": 3.9662131341174245
  8. },
  9. "count": 6
  10. }
  11. }
  12. }

The geo_centroid aggregation is more interesting when combined as a sub-aggregation to other bucket aggregations.

Example:

  1. resp = client.search(
  2. index="museums",
  3. size="0",
  4. aggs={
  5. "cities": {
  6. "terms": {
  7. "field": "city.keyword"
  8. },
  9. "aggs": {
  10. "centroid": {
  11. "geo_centroid": {
  12. "field": "location"
  13. }
  14. }
  15. }
  16. }
  17. },
  18. )
  19. print(resp)
  1. response = client.search(
  2. index: 'museums',
  3. size: 0,
  4. body: {
  5. aggregations: {
  6. cities: {
  7. terms: {
  8. field: 'city.keyword'
  9. },
  10. aggregations: {
  11. centroid: {
  12. geo_centroid: {
  13. field: 'location'
  14. }
  15. }
  16. }
  17. }
  18. }
  19. }
  20. )
  21. puts response
  1. const response = await client.search({
  2. index: "museums",
  3. size: 0,
  4. aggs: {
  5. cities: {
  6. terms: {
  7. field: "city.keyword",
  8. },
  9. aggs: {
  10. centroid: {
  11. geo_centroid: {
  12. field: "location",
  13. },
  14. },
  15. },
  16. },
  17. },
  18. });
  19. console.log(response);
  1. POST /museums/_search?size=0
  2. {
  3. "aggs": {
  4. "cities": {
  5. "terms": { "field": "city.keyword" },
  6. "aggs": {
  7. "centroid": {
  8. "geo_centroid": { "field": "location" }
  9. }
  10. }
  11. }
  12. }
  13. }

The above example uses geo_centroid as a sub-aggregation to a terms bucket aggregation for finding the central location for museums in each city.

The response for the above aggregation:

  1. {
  2. ...
  3. "aggregations": {
  4. "cities": {
  5. "sum_other_doc_count": 0,
  6. "doc_count_error_upper_bound": 0,
  7. "buckets": [
  8. {
  9. "key": "Amsterdam",
  10. "doc_count": 3,
  11. "centroid": {
  12. "location": {
  13. "lat": 52.371655656024814,
  14. "lon": 4.909563297405839
  15. },
  16. "count": 3
  17. }
  18. },
  19. {
  20. "key": "Paris",
  21. "doc_count": 2,
  22. "centroid": {
  23. "location": {
  24. "lat": 48.86055548675358,
  25. "lon": 2.3316944623366
  26. },
  27. "count": 2
  28. }
  29. },
  30. {
  31. "key": "Antwerp",
  32. "doc_count": 1,
  33. "centroid": {
  34. "location": {
  35. "lat": 51.22289997059852,
  36. "lon": 4.40519998781383
  37. },
  38. "count": 1
  39. }
  40. }
  41. ]
  42. }
  43. }
  44. }

Geo Centroid Aggregation on geo_shape fields

The centroid metric for geoshapes is more nuanced than for points. The centroid of a specific aggregation bucket containing shapes is the centroid of the highest-dimensionality shape type in the bucket. For example, if a bucket contains shapes comprising of polygons and lines, then the lines do not contribute to the centroid metric. Each type of shape’s centroid is calculated differently. Envelopes and circles ingested via the Circle are treated as polygons.

Geometry TypeCentroid Calculation

[Multi]Point

equally weighted average of all the coordinates

[Multi]LineString

a weighted average of all the centroids of each segment, where the weight of each segment is its length in degrees

[Multi]Polygon

a weighted average of all the centroids of all the triangles of a polygon where the triangles are formed by every two consecutive vertices and the starting-point. holes have negative weights. weights represent the area of the triangle in deg^2 calculated

GeometryCollection

The centroid of all the underlying geometries with the highest dimension. If Polygons and Lines and/or Points, then lines and/or points are ignored. If Lines and Points, then points are ignored

Example:

  1. resp = client.indices.create(
  2. index="places",
  3. mappings={
  4. "properties": {
  5. "geometry": {
  6. "type": "geo_shape"
  7. }
  8. }
  9. },
  10. )
  11. print(resp)
  12. resp1 = client.bulk(
  13. index="places",
  14. refresh=True,
  15. operations=[
  16. {
  17. "index": {
  18. "_id": 1
  19. }
  20. },
  21. {
  22. "name": "NEMO Science Museum",
  23. "geometry": "POINT(4.912350 52.374081)"
  24. },
  25. {
  26. "index": {
  27. "_id": 2
  28. }
  29. },
  30. {
  31. "name": "Sportpark De Weeren",
  32. "geometry": {
  33. "type": "Polygon",
  34. "coordinates": [
  35. [
  36. [
  37. 4.965305328369141,
  38. 52.39347642069457
  39. ],
  40. [
  41. 4.966979026794433,
  42. 52.391721758934835
  43. ],
  44. [
  45. 4.969425201416015,
  46. 52.39238958618537
  47. ],
  48. [
  49. 4.967944622039794,
  50. 52.39420969150824
  51. ],
  52. [
  53. 4.965305328369141,
  54. 52.39347642069457
  55. ]
  56. ]
  57. ]
  58. }
  59. }
  60. ],
  61. )
  62. print(resp1)
  63. resp2 = client.search(
  64. index="places",
  65. size="0",
  66. aggs={
  67. "centroid": {
  68. "geo_centroid": {
  69. "field": "geometry"
  70. }
  71. }
  72. },
  73. )
  74. print(resp2)
  1. response = client.indices.create(
  2. index: 'places',
  3. body: {
  4. mappings: {
  5. properties: {
  6. geometry: {
  7. type: 'geo_shape'
  8. }
  9. }
  10. }
  11. }
  12. )
  13. puts response
  14. response = client.bulk(
  15. index: 'places',
  16. refresh: true,
  17. body: [
  18. {
  19. index: {
  20. _id: 1
  21. }
  22. },
  23. {
  24. name: 'NEMO Science Museum',
  25. geometry: 'POINT(4.912350 52.374081)'
  26. },
  27. {
  28. index: {
  29. _id: 2
  30. }
  31. },
  32. {
  33. name: 'Sportpark De Weeren',
  34. geometry: {
  35. type: 'Polygon',
  36. coordinates: [
  37. [
  38. [
  39. 4.965305328369141,
  40. 52.39347642069457
  41. ],
  42. [
  43. 4.966979026794433,
  44. 52.391721758934835
  45. ],
  46. [
  47. 4.969425201416015,
  48. 52.39238958618537
  49. ],
  50. [
  51. 4.967944622039794,
  52. 52.39420969150824
  53. ],
  54. [
  55. 4.965305328369141,
  56. 52.39347642069457
  57. ]
  58. ]
  59. ]
  60. }
  61. }
  62. ]
  63. )
  64. puts response
  65. response = client.search(
  66. index: 'places',
  67. size: 0,
  68. body: {
  69. aggregations: {
  70. centroid: {
  71. geo_centroid: {
  72. field: 'geometry'
  73. }
  74. }
  75. }
  76. }
  77. )
  78. puts response
  1. const response = await client.indices.create({
  2. index: "places",
  3. mappings: {
  4. properties: {
  5. geometry: {
  6. type: "geo_shape",
  7. },
  8. },
  9. },
  10. });
  11. console.log(response);
  12. const response1 = await client.bulk({
  13. index: "places",
  14. refresh: "true",
  15. operations: [
  16. {
  17. index: {
  18. _id: 1,
  19. },
  20. },
  21. {
  22. name: "NEMO Science Museum",
  23. geometry: "POINT(4.912350 52.374081)",
  24. },
  25. {
  26. index: {
  27. _id: 2,
  28. },
  29. },
  30. {
  31. name: "Sportpark De Weeren",
  32. geometry: {
  33. type: "Polygon",
  34. coordinates: [
  35. [
  36. [4.965305328369141, 52.39347642069457],
  37. [4.966979026794433, 52.391721758934835],
  38. [4.969425201416015, 52.39238958618537],
  39. [4.967944622039794, 52.39420969150824],
  40. [4.965305328369141, 52.39347642069457],
  41. ],
  42. ],
  43. },
  44. },
  45. ],
  46. });
  47. console.log(response1);
  48. const response2 = await client.search({
  49. index: "places",
  50. size: 0,
  51. aggs: {
  52. centroid: {
  53. geo_centroid: {
  54. field: "geometry",
  55. },
  56. },
  57. },
  58. });
  59. console.log(response2);
  1. PUT /places
  2. {
  3. "mappings": {
  4. "properties": {
  5. "geometry": {
  6. "type": "geo_shape"
  7. }
  8. }
  9. }
  10. }
  11. POST /places/_bulk?refresh
  12. {"index":{"_id":1}}
  13. {"name": "NEMO Science Museum", "geometry": "POINT(4.912350 52.374081)" }
  14. {"index":{"_id":2}}
  15. {"name": "Sportpark De Weeren", "geometry": { "type": "Polygon", "coordinates": [ [ [ 4.965305328369141, 52.39347642069457 ], [ 4.966979026794433, 52.391721758934835 ], [ 4.969425201416015, 52.39238958618537 ], [ 4.967944622039794, 52.39420969150824 ], [ 4.965305328369141, 52.39347642069457 ] ] ] } }
  16. POST /places/_search?size=0
  17. {
  18. "aggs": {
  19. "centroid": {
  20. "geo_centroid": {
  21. "field": "geometry"
  22. }
  23. }
  24. }
  25. }
  1. {
  2. ...
  3. "aggregations": {
  4. "centroid": {
  5. "location": {
  6. "lat": 52.39296147599816,
  7. "lon": 4.967404240742326
  8. },
  9. "count": 2
  10. }
  11. }
  12. }

Using geo_centroid as a sub-aggregation of geohash_grid

The geohash_grid aggregation places documents, not individual geopoints, into buckets. If a document’s geo_point field contains multiple values, the document could be assigned to multiple buckets, even if one or more of its geopoints are outside the bucket boundaries.

If a geocentroid sub-aggregation is also used, each centroid is calculated using all geopoints in a bucket, including those outside the bucket boundaries. This can result in centroids outside of bucket boundaries.