Stop analyzer

Stop analyzer

The stop analyzer is the same as the simple analyzer but adds support for removing stop words. It defaults to using the _english_ stop words.

Example output

  1. resp = client.indices.analyze(
  2. analyzer="stop",
  3. text="The 2 QUICK Brown-Foxes jumped over the lazy dog's bone.",
  4. )
  5. print(resp)
  1. response = client.indices.analyze(
  2. body: {
  3. analyzer: 'stop',
  4. text: "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  5. }
  6. )
  7. puts response
  1. const response = await client.indices.analyze({
  2. analyzer: "stop",
  3. text: "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone.",
  4. });
  5. console.log(response);
  1. POST _analyze
  2. {
  3. "analyzer": "stop",
  4. "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  5. }

The above sentence would produce the following terms:

  1. [ quick, brown, foxes, jumped, over, lazy, dog, s, bone ]

Configuration

The stop analyzer accepts the following parameters:

stopwords

A pre-defined stop words list like english or an array containing a list of stop words. Defaults to english.

stopwords_path

The path to a file containing stop words. This path is relative to the Elasticsearch config directory.

See the Stop Token Filter for more information about stop word configuration.

Example configuration

In this example, we configure the stop analyzer to use a specified list of words as stop words:

  1. resp = client.indices.create(
  2. index="my-index-000001",
  3. settings={
  4. "analysis": {
  5. "analyzer": {
  6. "my_stop_analyzer": {
  7. "type": "stop",
  8. "stopwords": [
  9. "the",
  10. "over"
  11. ]
  12. }
  13. }
  14. }
  15. },
  16. )
  17. print(resp)
  18. resp1 = client.indices.analyze(
  19. index="my-index-000001",
  20. analyzer="my_stop_analyzer",
  21. text="The 2 QUICK Brown-Foxes jumped over the lazy dog's bone.",
  22. )
  23. print(resp1)
  1. response = client.indices.create(
  2. index: 'my-index-000001',
  3. body: {
  4. settings: {
  5. analysis: {
  6. analyzer: {
  7. my_stop_analyzer: {
  8. type: 'stop',
  9. stopwords: [
  10. 'the',
  11. 'over'
  12. ]
  13. }
  14. }
  15. }
  16. }
  17. }
  18. )
  19. puts response
  20. response = client.indices.analyze(
  21. index: 'my-index-000001',
  22. body: {
  23. analyzer: 'my_stop_analyzer',
  24. text: "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  25. }
  26. )
  27. puts response
  1. const response = await client.indices.create({
  2. index: "my-index-000001",
  3. settings: {
  4. analysis: {
  5. analyzer: {
  6. my_stop_analyzer: {
  7. type: "stop",
  8. stopwords: ["the", "over"],
  9. },
  10. },
  11. },
  12. },
  13. });
  14. console.log(response);
  15. const response1 = await client.indices.analyze({
  16. index: "my-index-000001",
  17. analyzer: "my_stop_analyzer",
  18. text: "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone.",
  19. });
  20. console.log(response1);
  1. PUT my-index-000001
  2. {
  3. "settings": {
  4. "analysis": {
  5. "analyzer": {
  6. "my_stop_analyzer": {
  7. "type": "stop",
  8. "stopwords": ["the", "over"]
  9. }
  10. }
  11. }
  12. }
  13. }
  14. POST my-index-000001/_analyze
  15. {
  16. "analyzer": "my_stop_analyzer",
  17. "text": "The 2 QUICK Brown-Foxes jumped over the lazy dog's bone."
  18. }

The above example produces the following terms:

  1. [ quick, brown, foxes, jumped, lazy, dog, s, bone ]

Definition

It consists of:

Tokenizer

Token filters

If you need to customize the stop analyzer beyond the configuration parameters then you need to recreate it as a custom analyzer and modify it, usually by adding token filters. This would recreate the built-in stop analyzer and you can use it as a starting point for further customization:

  1. resp = client.indices.create(
  2. index="stop_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": "_english_"
  9. }
  10. },
  11. "analyzer": {
  12. "rebuilt_stop": {
  13. "tokenizer": "lowercase",
  14. "filter": [
  15. "english_stop"
  16. ]
  17. }
  18. }
  19. }
  20. },
  21. )
  22. print(resp)
  1. response = client.indices.create(
  2. index: 'stop_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. english_stop: {
  8. type: 'stop',
  9. stopwords: '_english_'
  10. }
  11. },
  12. analyzer: {
  13. rebuilt_stop: {
  14. tokenizer: 'lowercase',
  15. filter: [
  16. 'english_stop'
  17. ]
  18. }
  19. }
  20. }
  21. }
  22. }
  23. )
  24. puts response
  1. const response = await client.indices.create({
  2. index: "stop_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. english_stop: {
  7. type: "stop",
  8. stopwords: "_english_",
  9. },
  10. },
  11. analyzer: {
  12. rebuilt_stop: {
  13. tokenizer: "lowercase",
  14. filter: ["english_stop"],
  15. },
  16. },
  17. },
  18. },
  19. });
  20. console.log(response);
  1. PUT /stop_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": "_english_"
  9. }
  10. },
  11. "analyzer": {
  12. "rebuilt_stop": {
  13. "tokenizer": "lowercase",
  14. "filter": [
  15. "english_stop"
  16. ]
  17. }
  18. }
  19. }
  20. }
  21. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

You’d add any token filters after english_stop.