Language analyzers

Language analyzers

A set of analyzers aimed at analyzing specific language text. The following types are supported: arabic, armenian, basque, bengali, brazilian, bulgarian, catalan, cjk, czech, danish, dutch, english, estonian, finnish, french, galician, german, greek, hindi, hungarian, indonesian, irish, italian, latvian, lithuanian, norwegian, persian, portuguese, romanian, russian, serbian, sorani, spanish, swedish, turkish, thai.

Configuring language analyzers

Stopwords

All analyzers support setting custom stopwords either internally in the config, or by using an external stopwords file by setting stopwords_path. Check Stop Analyzer for more details.

Excluding words from stemming

The stem_exclusion parameter allows you to specify an array of lowercase words that should not be stemmed. Internally, this functionality is implemented by adding the keyword_marker token filter with the keywords set to the value of the stem_exclusion parameter.

The following analyzers support setting custom stem_exclusion list: arabic, armenian, basque, bengali, bulgarian, catalan, czech, dutch, english, finnish, french, galician, german, hindi, hungarian, indonesian, irish, italian, latvian, lithuanian, norwegian, portuguese, romanian, russian, serbian, sorani, spanish, swedish, turkish.

Reimplementing language analyzers

The built-in language analyzers can be reimplemented as custom analyzers (as described below) in order to customize their behaviour.

If you do not intend to exclude words from being stemmed (the equivalent of the stem_exclusion parameter above), then you should remove the keyword_marker token filter from the custom analyzer configuration.

arabic analyzer

The arabic analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="arabic_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "arabic_stop": {
  7. "type": "stop",
  8. "stopwords": "_arabic_"
  9. },
  10. "arabic_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "مثال"
  14. ]
  15. },
  16. "arabic_stemmer": {
  17. "type": "stemmer",
  18. "language": "arabic"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_arabic": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "decimal_digit",
  27. "arabic_stop",
  28. "arabic_normalization",
  29. "arabic_keywords",
  30. "arabic_stemmer"
  31. ]
  32. }
  33. }
  34. }
  35. },
  36. )
  37. print(resp)
  1. response = client.indices.create(
  2. index: 'arabic_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. arabic_stop: {
  8. type: 'stop',
  9. stopwords: '_arabic_'
  10. },
  11. arabic_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'مثال'
  15. ]
  16. },
  17. arabic_stemmer: {
  18. type: 'stemmer',
  19. language: 'arabic'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_arabic: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'decimal_digit',
  28. 'arabic_stop',
  29. 'arabic_normalization',
  30. 'arabic_keywords',
  31. 'arabic_stemmer'
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
  38. )
  39. puts response
  1. const response = await client.indices.create({
  2. index: "arabic_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. arabic_stop: {
  7. type: "stop",
  8. stopwords: "_arabic_",
  9. },
  10. arabic_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["مثال"],
  13. },
  14. arabic_stemmer: {
  15. type: "stemmer",
  16. language: "arabic",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_arabic: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "decimal_digit",
  25. "arabic_stop",
  26. "arabic_normalization",
  27. "arabic_keywords",
  28. "arabic_stemmer",
  29. ],
  30. },
  31. },
  32. },
  33. },
  34. });
  35. console.log(response);
  1. PUT /arabic_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "arabic_stop": {
  7. "type": "stop",
  8. "stopwords": "_arabic_"
  9. },
  10. "arabic_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["مثال"]
  13. },
  14. "arabic_stemmer": {
  15. "type": "stemmer",
  16. "language": "arabic"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_arabic": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "decimal_digit",
  25. "arabic_stop",
  26. "arabic_normalization",
  27. "arabic_keywords",
  28. "arabic_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. }
  34. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

armenian analyzer

The armenian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="armenian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "armenian_stop": {
  7. "type": "stop",
  8. "stopwords": "_armenian_"
  9. },
  10. "armenian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "օրինակ"
  14. ]
  15. },
  16. "armenian_stemmer": {
  17. "type": "stemmer",
  18. "language": "armenian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_armenian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "armenian_stop",
  27. "armenian_keywords",
  28. "armenian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'armenian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. armenian_stop: {
  8. type: 'stop',
  9. stopwords: '_armenian_'
  10. },
  11. armenian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'օրինակ'
  15. ]
  16. },
  17. armenian_stemmer: {
  18. type: 'stemmer',
  19. language: 'armenian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_armenian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'armenian_stop',
  28. 'armenian_keywords',
  29. 'armenian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "armenian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. armenian_stop: {
  7. type: "stop",
  8. stopwords: "_armenian_",
  9. },
  10. armenian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["օրինակ"],
  13. },
  14. armenian_stemmer: {
  15. type: "stemmer",
  16. language: "armenian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_armenian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "armenian_stop",
  25. "armenian_keywords",
  26. "armenian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /armenian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "armenian_stop": {
  7. "type": "stop",
  8. "stopwords": "_armenian_"
  9. },
  10. "armenian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["օրինակ"]
  13. },
  14. "armenian_stemmer": {
  15. "type": "stemmer",
  16. "language": "armenian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_armenian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "armenian_stop",
  25. "armenian_keywords",
  26. "armenian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

basque analyzer

The basque analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="basque_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "basque_stop": {
  7. "type": "stop",
  8. "stopwords": "_basque_"
  9. },
  10. "basque_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "Adibidez"
  14. ]
  15. },
  16. "basque_stemmer": {
  17. "type": "stemmer",
  18. "language": "basque"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_basque": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "basque_stop",
  27. "basque_keywords",
  28. "basque_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'basque_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. basque_stop: {
  8. type: 'stop',
  9. stopwords: '_basque_'
  10. },
  11. basque_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'Adibidez'
  15. ]
  16. },
  17. basque_stemmer: {
  18. type: 'stemmer',
  19. language: 'basque'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_basque: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'basque_stop',
  28. 'basque_keywords',
  29. 'basque_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "basque_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. basque_stop: {
  7. type: "stop",
  8. stopwords: "_basque_",
  9. },
  10. basque_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["Adibidez"],
  13. },
  14. basque_stemmer: {
  15. type: "stemmer",
  16. language: "basque",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_basque: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "basque_stop",
  25. "basque_keywords",
  26. "basque_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /basque_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "basque_stop": {
  7. "type": "stop",
  8. "stopwords": "_basque_"
  9. },
  10. "basque_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["Adibidez"]
  13. },
  14. "basque_stemmer": {
  15. "type": "stemmer",
  16. "language": "basque"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_basque": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "basque_stop",
  25. "basque_keywords",
  26. "basque_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

bengali analyzer

The bengali analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="bengali_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "bengali_stop": {
  7. "type": "stop",
  8. "stopwords": "_bengali_"
  9. },
  10. "bengali_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "উদাহরণ"
  14. ]
  15. },
  16. "bengali_stemmer": {
  17. "type": "stemmer",
  18. "language": "bengali"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_bengali": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "decimal_digit",
  27. "bengali_keywords",
  28. "indic_normalization",
  29. "bengali_normalization",
  30. "bengali_stop",
  31. "bengali_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. },
  37. )
  38. print(resp)
  1. response = client.indices.create(
  2. index: 'bengali_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. bengali_stop: {
  8. type: 'stop',
  9. stopwords: '_bengali_'
  10. },
  11. bengali_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'উদাহরণ'
  15. ]
  16. },
  17. bengali_stemmer: {
  18. type: 'stemmer',
  19. language: 'bengali'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_bengali: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'decimal_digit',
  28. 'bengali_keywords',
  29. 'indic_normalization',
  30. 'bengali_normalization',
  31. 'bengali_stop',
  32. 'bengali_stemmer'
  33. ]
  34. }
  35. }
  36. }
  37. }
  38. }
  39. )
  40. puts response
  1. const response = await client.indices.create({
  2. index: "bengali_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. bengali_stop: {
  7. type: "stop",
  8. stopwords: "_bengali_",
  9. },
  10. bengali_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["উদাহরণ"],
  13. },
  14. bengali_stemmer: {
  15. type: "stemmer",
  16. language: "bengali",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_bengali: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "decimal_digit",
  25. "bengali_keywords",
  26. "indic_normalization",
  27. "bengali_normalization",
  28. "bengali_stop",
  29. "bengali_stemmer",
  30. ],
  31. },
  32. },
  33. },
  34. },
  35. });
  36. console.log(response);
  1. PUT /bengali_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "bengali_stop": {
  7. "type": "stop",
  8. "stopwords": "_bengali_"
  9. },
  10. "bengali_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["উদাহরণ"]
  13. },
  14. "bengali_stemmer": {
  15. "type": "stemmer",
  16. "language": "bengali"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_bengali": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "decimal_digit",
  25. "bengali_keywords",
  26. "indic_normalization",
  27. "bengali_normalization",
  28. "bengali_stop",
  29. "bengali_stemmer"
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

brazilian analyzer

The brazilian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="brazilian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "brazilian_stop": {
  7. "type": "stop",
  8. "stopwords": "_brazilian_"
  9. },
  10. "brazilian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplo"
  14. ]
  15. },
  16. "brazilian_stemmer": {
  17. "type": "stemmer",
  18. "language": "brazilian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_brazilian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "brazilian_stop",
  27. "brazilian_keywords",
  28. "brazilian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'brazilian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. brazilian_stop: {
  8. type: 'stop',
  9. stopwords: '_brazilian_'
  10. },
  11. brazilian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplo'
  15. ]
  16. },
  17. brazilian_stemmer: {
  18. type: 'stemmer',
  19. language: 'brazilian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_brazilian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'brazilian_stop',
  28. 'brazilian_keywords',
  29. 'brazilian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "brazilian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. brazilian_stop: {
  7. type: "stop",
  8. stopwords: "_brazilian_",
  9. },
  10. brazilian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplo"],
  13. },
  14. brazilian_stemmer: {
  15. type: "stemmer",
  16. language: "brazilian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_brazilian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "brazilian_stop",
  25. "brazilian_keywords",
  26. "brazilian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /brazilian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "brazilian_stop": {
  7. "type": "stop",
  8. "stopwords": "_brazilian_"
  9. },
  10. "brazilian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplo"]
  13. },
  14. "brazilian_stemmer": {
  15. "type": "stemmer",
  16. "language": "brazilian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_brazilian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "brazilian_stop",
  25. "brazilian_keywords",
  26. "brazilian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

bulgarian analyzer

The bulgarian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="bulgarian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "bulgarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_bulgarian_"
  9. },
  10. "bulgarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "пример"
  14. ]
  15. },
  16. "bulgarian_stemmer": {
  17. "type": "stemmer",
  18. "language": "bulgarian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_bulgarian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "bulgarian_stop",
  27. "bulgarian_keywords",
  28. "bulgarian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'bulgarian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. bulgarian_stop: {
  8. type: 'stop',
  9. stopwords: '_bulgarian_'
  10. },
  11. bulgarian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'пример'
  15. ]
  16. },
  17. bulgarian_stemmer: {
  18. type: 'stemmer',
  19. language: 'bulgarian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_bulgarian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'bulgarian_stop',
  28. 'bulgarian_keywords',
  29. 'bulgarian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "bulgarian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. bulgarian_stop: {
  7. type: "stop",
  8. stopwords: "_bulgarian_",
  9. },
  10. bulgarian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["пример"],
  13. },
  14. bulgarian_stemmer: {
  15. type: "stemmer",
  16. language: "bulgarian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_bulgarian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "bulgarian_stop",
  25. "bulgarian_keywords",
  26. "bulgarian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /bulgarian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "bulgarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_bulgarian_"
  9. },
  10. "bulgarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["пример"]
  13. },
  14. "bulgarian_stemmer": {
  15. "type": "stemmer",
  16. "language": "bulgarian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_bulgarian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "bulgarian_stop",
  25. "bulgarian_keywords",
  26. "bulgarian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

catalan analyzer

The catalan analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="catalan_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "catalan_elision": {
  7. "type": "elision",
  8. "articles": [
  9. "d",
  10. "l",
  11. "m",
  12. "n",
  13. "s",
  14. "t"
  15. ],
  16. "articles_case": True
  17. },
  18. "catalan_stop": {
  19. "type": "stop",
  20. "stopwords": "_catalan_"
  21. },
  22. "catalan_keywords": {
  23. "type": "keyword_marker",
  24. "keywords": [
  25. "example"
  26. ]
  27. },
  28. "catalan_stemmer": {
  29. "type": "stemmer",
  30. "language": "catalan"
  31. }
  32. },
  33. "analyzer": {
  34. "rebuilt_catalan": {
  35. "tokenizer": "standard",
  36. "filter": [
  37. "catalan_elision",
  38. "lowercase",
  39. "catalan_stop",
  40. "catalan_keywords",
  41. "catalan_stemmer"
  42. ]
  43. }
  44. }
  45. }
  46. },
  47. )
  48. print(resp)
  1. response = client.indices.create(
  2. index: 'catalan_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. catalan_elision: {
  8. type: 'elision',
  9. articles: [
  10. 'd',
  11. 'l',
  12. 'm',
  13. 'n',
  14. 's',
  15. 't'
  16. ],
  17. articles_case: true
  18. },
  19. catalan_stop: {
  20. type: 'stop',
  21. stopwords: '_catalan_'
  22. },
  23. catalan_keywords: {
  24. type: 'keyword_marker',
  25. keywords: [
  26. 'example'
  27. ]
  28. },
  29. catalan_stemmer: {
  30. type: 'stemmer',
  31. language: 'catalan'
  32. }
  33. },
  34. analyzer: {
  35. rebuilt_catalan: {
  36. tokenizer: 'standard',
  37. filter: [
  38. 'catalan_elision',
  39. 'lowercase',
  40. 'catalan_stop',
  41. 'catalan_keywords',
  42. 'catalan_stemmer'
  43. ]
  44. }
  45. }
  46. }
  47. }
  48. }
  49. )
  50. puts response
  1. const response = await client.indices.create({
  2. index: "catalan_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. catalan_elision: {
  7. type: "elision",
  8. articles: ["d", "l", "m", "n", "s", "t"],
  9. articles_case: true,
  10. },
  11. catalan_stop: {
  12. type: "stop",
  13. stopwords: "_catalan_",
  14. },
  15. catalan_keywords: {
  16. type: "keyword_marker",
  17. keywords: ["example"],
  18. },
  19. catalan_stemmer: {
  20. type: "stemmer",
  21. language: "catalan",
  22. },
  23. },
  24. analyzer: {
  25. rebuilt_catalan: {
  26. tokenizer: "standard",
  27. filter: [
  28. "catalan_elision",
  29. "lowercase",
  30. "catalan_stop",
  31. "catalan_keywords",
  32. "catalan_stemmer",
  33. ],
  34. },
  35. },
  36. },
  37. },
  38. });
  39. console.log(response);
  1. PUT /catalan_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "catalan_elision": {
  7. "type": "elision",
  8. "articles": [ "d", "l", "m", "n", "s", "t"],
  9. "articles_case": true
  10. },
  11. "catalan_stop": {
  12. "type": "stop",
  13. "stopwords": "_catalan_"
  14. },
  15. "catalan_keywords": {
  16. "type": "keyword_marker",
  17. "keywords": ["example"]
  18. },
  19. "catalan_stemmer": {
  20. "type": "stemmer",
  21. "language": "catalan"
  22. }
  23. },
  24. "analyzer": {
  25. "rebuilt_catalan": {
  26. "tokenizer": "standard",
  27. "filter": [
  28. "catalan_elision",
  29. "lowercase",
  30. "catalan_stop",
  31. "catalan_keywords",
  32. "catalan_stemmer"
  33. ]
  34. }
  35. }
  36. }
  37. }
  38. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

cjk analyzer

You may find that icu_analyzer in the ICU analysis plugin works better for CJK text than the cjk analyzer. Experiment with your text and queries.

The cjk analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="cjk_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": [
  9. "a",
  10. "and",
  11. "are",
  12. "as",
  13. "at",
  14. "be",
  15. "but",
  16. "by",
  17. "for",
  18. "if",
  19. "in",
  20. "into",
  21. "is",
  22. "it",
  23. "no",
  24. "not",
  25. "of",
  26. "on",
  27. "or",
  28. "s",
  29. "such",
  30. "t",
  31. "that",
  32. "the",
  33. "their",
  34. "then",
  35. "there",
  36. "these",
  37. "they",
  38. "this",
  39. "to",
  40. "was",
  41. "will",
  42. "with",
  43. "www"
  44. ]
  45. }
  46. },
  47. "analyzer": {
  48. "rebuilt_cjk": {
  49. "tokenizer": "standard",
  50. "filter": [
  51. "cjk_width",
  52. "lowercase",
  53. "cjk_bigram",
  54. "english_stop"
  55. ]
  56. }
  57. }
  58. }
  59. },
  60. )
  61. print(resp)
  1. response = client.indices.create(
  2. index: 'cjk_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. english_stop: {
  8. type: 'stop',
  9. stopwords: [
  10. 'a',
  11. 'and',
  12. 'are',
  13. 'as',
  14. 'at',
  15. 'be',
  16. 'but',
  17. 'by',
  18. 'for',
  19. 'if',
  20. 'in',
  21. 'into',
  22. 'is',
  23. 'it',
  24. 'no',
  25. 'not',
  26. 'of',
  27. 'on',
  28. 'or',
  29. 's',
  30. 'such',
  31. 't',
  32. 'that',
  33. 'the',
  34. 'their',
  35. 'then',
  36. 'there',
  37. 'these',
  38. 'they',
  39. 'this',
  40. 'to',
  41. 'was',
  42. 'will',
  43. 'with',
  44. 'www'
  45. ]
  46. }
  47. },
  48. analyzer: {
  49. rebuilt_cjk: {
  50. tokenizer: 'standard',
  51. filter: [
  52. 'cjk_width',
  53. 'lowercase',
  54. 'cjk_bigram',
  55. 'english_stop'
  56. ]
  57. }
  58. }
  59. }
  60. }
  61. }
  62. )
  63. puts response
  1. const response = await client.indices.create({
  2. index: "cjk_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. english_stop: {
  7. type: "stop",
  8. stopwords: [
  9. "a",
  10. "and",
  11. "are",
  12. "as",
  13. "at",
  14. "be",
  15. "but",
  16. "by",
  17. "for",
  18. "if",
  19. "in",
  20. "into",
  21. "is",
  22. "it",
  23. "no",
  24. "not",
  25. "of",
  26. "on",
  27. "or",
  28. "s",
  29. "such",
  30. "t",
  31. "that",
  32. "the",
  33. "their",
  34. "then",
  35. "there",
  36. "these",
  37. "they",
  38. "this",
  39. "to",
  40. "was",
  41. "will",
  42. "with",
  43. "www",
  44. ],
  45. },
  46. },
  47. analyzer: {
  48. rebuilt_cjk: {
  49. tokenizer: "standard",
  50. filter: ["cjk_width", "lowercase", "cjk_bigram", "english_stop"],
  51. },
  52. },
  53. },
  54. },
  55. });
  56. console.log(response);
  1. PUT /cjk_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": [
  9. "a", "and", "are", "as", "at", "be", "but", "by", "for",
  10. "if", "in", "into", "is", "it", "no", "not", "of", "on",
  11. "or", "s", "such", "t", "that", "the", "their", "then",
  12. "there", "these", "they", "this", "to", "was", "will",
  13. "with", "www"
  14. ]
  15. }
  16. },
  17. "analyzer": {
  18. "rebuilt_cjk": {
  19. "tokenizer": "standard",
  20. "filter": [
  21. "cjk_width",
  22. "lowercase",
  23. "cjk_bigram",
  24. "english_stop"
  25. ]
  26. }
  27. }
  28. }
  29. }
  30. }

The default stopwords can be overridden with the stopwords or stopwordspath parameters. The default stop words are almost the same as the _english set, but not exactly the same.

czech analyzer

The czech analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="czech_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "czech_stop": {
  7. "type": "stop",
  8. "stopwords": "_czech_"
  9. },
  10. "czech_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "příklad"
  14. ]
  15. },
  16. "czech_stemmer": {
  17. "type": "stemmer",
  18. "language": "czech"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_czech": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "czech_stop",
  27. "czech_keywords",
  28. "czech_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'czech_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. czech_stop: {
  8. type: 'stop',
  9. stopwords: '_czech_'
  10. },
  11. czech_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'příklad'
  15. ]
  16. },
  17. czech_stemmer: {
  18. type: 'stemmer',
  19. language: 'czech'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_czech: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'czech_stop',
  28. 'czech_keywords',
  29. 'czech_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "czech_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. czech_stop: {
  7. type: "stop",
  8. stopwords: "_czech_",
  9. },
  10. czech_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["příklad"],
  13. },
  14. czech_stemmer: {
  15. type: "stemmer",
  16. language: "czech",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_czech: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "czech_stop",
  25. "czech_keywords",
  26. "czech_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /czech_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "czech_stop": {
  7. "type": "stop",
  8. "stopwords": "_czech_"
  9. },
  10. "czech_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["příklad"]
  13. },
  14. "czech_stemmer": {
  15. "type": "stemmer",
  16. "language": "czech"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_czech": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "czech_stop",
  25. "czech_keywords",
  26. "czech_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

danish analyzer

The danish analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="danish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "danish_stop": {
  7. "type": "stop",
  8. "stopwords": "_danish_"
  9. },
  10. "danish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "eksempel"
  14. ]
  15. },
  16. "danish_stemmer": {
  17. "type": "stemmer",
  18. "language": "danish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_danish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "danish_stop",
  27. "danish_keywords",
  28. "danish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'danish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. danish_stop: {
  8. type: 'stop',
  9. stopwords: '_danish_'
  10. },
  11. danish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'eksempel'
  15. ]
  16. },
  17. danish_stemmer: {
  18. type: 'stemmer',
  19. language: 'danish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_danish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'danish_stop',
  28. 'danish_keywords',
  29. 'danish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "danish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. danish_stop: {
  7. type: "stop",
  8. stopwords: "_danish_",
  9. },
  10. danish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["eksempel"],
  13. },
  14. danish_stemmer: {
  15. type: "stemmer",
  16. language: "danish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_danish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "danish_stop",
  25. "danish_keywords",
  26. "danish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /danish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "danish_stop": {
  7. "type": "stop",
  8. "stopwords": "_danish_"
  9. },
  10. "danish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["eksempel"]
  13. },
  14. "danish_stemmer": {
  15. "type": "stemmer",
  16. "language": "danish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_danish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "danish_stop",
  25. "danish_keywords",
  26. "danish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

dutch analyzer

The dutch analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="dutch_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "dutch_stop": {
  7. "type": "stop",
  8. "stopwords": "_dutch_"
  9. },
  10. "dutch_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "voorbeeld"
  14. ]
  15. },
  16. "dutch_stemmer": {
  17. "type": "stemmer",
  18. "language": "dutch"
  19. },
  20. "dutch_override": {
  21. "type": "stemmer_override",
  22. "rules": [
  23. "fiets=>fiets",
  24. "bromfiets=>bromfiets",
  25. "ei=>eier",
  26. "kind=>kinder"
  27. ]
  28. }
  29. },
  30. "analyzer": {
  31. "rebuilt_dutch": {
  32. "tokenizer": "standard",
  33. "filter": [
  34. "lowercase",
  35. "dutch_stop",
  36. "dutch_keywords",
  37. "dutch_override",
  38. "dutch_stemmer"
  39. ]
  40. }
  41. }
  42. }
  43. },
  44. )
  45. print(resp)
  1. response = client.indices.create(
  2. index: 'dutch_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. dutch_stop: {
  8. type: 'stop',
  9. stopwords: '_dutch_'
  10. },
  11. dutch_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'voorbeeld'
  15. ]
  16. },
  17. dutch_stemmer: {
  18. type: 'stemmer',
  19. language: 'dutch'
  20. },
  21. dutch_override: {
  22. type: 'stemmer_override',
  23. rules: [
  24. 'fiets=>fiets',
  25. 'bromfiets=>bromfiets',
  26. 'ei=>eier',
  27. 'kind=>kinder'
  28. ]
  29. }
  30. },
  31. analyzer: {
  32. rebuilt_dutch: {
  33. tokenizer: 'standard',
  34. filter: [
  35. 'lowercase',
  36. 'dutch_stop',
  37. 'dutch_keywords',
  38. 'dutch_override',
  39. 'dutch_stemmer'
  40. ]
  41. }
  42. }
  43. }
  44. }
  45. }
  46. )
  47. puts response
  1. const response = await client.indices.create({
  2. index: "dutch_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. dutch_stop: {
  7. type: "stop",
  8. stopwords: "_dutch_",
  9. },
  10. dutch_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["voorbeeld"],
  13. },
  14. dutch_stemmer: {
  15. type: "stemmer",
  16. language: "dutch",
  17. },
  18. dutch_override: {
  19. type: "stemmer_override",
  20. rules: [
  21. "fiets=>fiets",
  22. "bromfiets=>bromfiets",
  23. "ei=>eier",
  24. "kind=>kinder",
  25. ],
  26. },
  27. },
  28. analyzer: {
  29. rebuilt_dutch: {
  30. tokenizer: "standard",
  31. filter: [
  32. "lowercase",
  33. "dutch_stop",
  34. "dutch_keywords",
  35. "dutch_override",
  36. "dutch_stemmer",
  37. ],
  38. },
  39. },
  40. },
  41. },
  42. });
  43. console.log(response);
  1. PUT /dutch_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "dutch_stop": {
  7. "type": "stop",
  8. "stopwords": "_dutch_"
  9. },
  10. "dutch_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["voorbeeld"]
  13. },
  14. "dutch_stemmer": {
  15. "type": "stemmer",
  16. "language": "dutch"
  17. },
  18. "dutch_override": {
  19. "type": "stemmer_override",
  20. "rules": [
  21. "fiets=>fiets",
  22. "bromfiets=>bromfiets",
  23. "ei=>eier",
  24. "kind=>kinder"
  25. ]
  26. }
  27. },
  28. "analyzer": {
  29. "rebuilt_dutch": {
  30. "tokenizer": "standard",
  31. "filter": [
  32. "lowercase",
  33. "dutch_stop",
  34. "dutch_keywords",
  35. "dutch_override",
  36. "dutch_stemmer"
  37. ]
  38. }
  39. }
  40. }
  41. }
  42. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

english analyzer

The english analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="english_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": "_english_"
  9. },
  10. "english_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "example"
  14. ]
  15. },
  16. "english_stemmer": {
  17. "type": "stemmer",
  18. "language": "english"
  19. },
  20. "english_possessive_stemmer": {
  21. "type": "stemmer",
  22. "language": "possessive_english"
  23. }
  24. },
  25. "analyzer": {
  26. "rebuilt_english": {
  27. "tokenizer": "standard",
  28. "filter": [
  29. "english_possessive_stemmer",
  30. "lowercase",
  31. "english_stop",
  32. "english_keywords",
  33. "english_stemmer"
  34. ]
  35. }
  36. }
  37. }
  38. },
  39. )
  40. print(resp)
  1. response = client.indices.create(
  2. index: 'english_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. english_stop: {
  8. type: 'stop',
  9. stopwords: '_english_'
  10. },
  11. english_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'example'
  15. ]
  16. },
  17. english_stemmer: {
  18. type: 'stemmer',
  19. language: 'english'
  20. },
  21. english_possessive_stemmer: {
  22. type: 'stemmer',
  23. language: 'possessive_english'
  24. }
  25. },
  26. analyzer: {
  27. rebuilt_english: {
  28. tokenizer: 'standard',
  29. filter: [
  30. 'english_possessive_stemmer',
  31. 'lowercase',
  32. 'english_stop',
  33. 'english_keywords',
  34. 'english_stemmer'
  35. ]
  36. }
  37. }
  38. }
  39. }
  40. }
  41. )
  42. puts response
  1. const response = await client.indices.create({
  2. index: "english_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. english_stop: {
  7. type: "stop",
  8. stopwords: "_english_",
  9. },
  10. english_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["example"],
  13. },
  14. english_stemmer: {
  15. type: "stemmer",
  16. language: "english",
  17. },
  18. english_possessive_stemmer: {
  19. type: "stemmer",
  20. language: "possessive_english",
  21. },
  22. },
  23. analyzer: {
  24. rebuilt_english: {
  25. tokenizer: "standard",
  26. filter: [
  27. "english_possessive_stemmer",
  28. "lowercase",
  29. "english_stop",
  30. "english_keywords",
  31. "english_stemmer",
  32. ],
  33. },
  34. },
  35. },
  36. },
  37. });
  38. console.log(response);
  1. PUT /english_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": "_english_"
  9. },
  10. "english_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["example"]
  13. },
  14. "english_stemmer": {
  15. "type": "stemmer",
  16. "language": "english"
  17. },
  18. "english_possessive_stemmer": {
  19. "type": "stemmer",
  20. "language": "possessive_english"
  21. }
  22. },
  23. "analyzer": {
  24. "rebuilt_english": {
  25. "tokenizer": "standard",
  26. "filter": [
  27. "english_possessive_stemmer",
  28. "lowercase",
  29. "english_stop",
  30. "english_keywords",
  31. "english_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

estonian analyzer

The estonian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="estonian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "estonian_stop": {
  7. "type": "stop",
  8. "stopwords": "_estonian_"
  9. },
  10. "estonian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "näide"
  14. ]
  15. },
  16. "estonian_stemmer": {
  17. "type": "stemmer",
  18. "language": "estonian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_estonian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "estonian_stop",
  27. "estonian_keywords",
  28. "estonian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'estonian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. estonian_stop: {
  8. type: 'stop',
  9. stopwords: '_estonian_'
  10. },
  11. estonian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'näide'
  15. ]
  16. },
  17. estonian_stemmer: {
  18. type: 'stemmer',
  19. language: 'estonian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_estonian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'estonian_stop',
  28. 'estonian_keywords',
  29. 'estonian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "estonian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. estonian_stop: {
  7. type: "stop",
  8. stopwords: "_estonian_",
  9. },
  10. estonian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["näide"],
  13. },
  14. estonian_stemmer: {
  15. type: "stemmer",
  16. language: "estonian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_estonian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "estonian_stop",
  25. "estonian_keywords",
  26. "estonian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /estonian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "estonian_stop": {
  7. "type": "stop",
  8. "stopwords": "_estonian_"
  9. },
  10. "estonian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["näide"]
  13. },
  14. "estonian_stemmer": {
  15. "type": "stemmer",
  16. "language": "estonian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_estonian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "estonian_stop",
  25. "estonian_keywords",
  26. "estonian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

finnish analyzer

The finnish analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="finnish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "finnish_stop": {
  7. "type": "stop",
  8. "stopwords": "_finnish_"
  9. },
  10. "finnish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "esimerkki"
  14. ]
  15. },
  16. "finnish_stemmer": {
  17. "type": "stemmer",
  18. "language": "finnish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_finnish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "finnish_stop",
  27. "finnish_keywords",
  28. "finnish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'finnish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. finnish_stop: {
  8. type: 'stop',
  9. stopwords: '_finnish_'
  10. },
  11. finnish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'esimerkki'
  15. ]
  16. },
  17. finnish_stemmer: {
  18. type: 'stemmer',
  19. language: 'finnish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_finnish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'finnish_stop',
  28. 'finnish_keywords',
  29. 'finnish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "finnish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. finnish_stop: {
  7. type: "stop",
  8. stopwords: "_finnish_",
  9. },
  10. finnish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["esimerkki"],
  13. },
  14. finnish_stemmer: {
  15. type: "stemmer",
  16. language: "finnish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_finnish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "finnish_stop",
  25. "finnish_keywords",
  26. "finnish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /finnish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "finnish_stop": {
  7. "type": "stop",
  8. "stopwords": "_finnish_"
  9. },
  10. "finnish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["esimerkki"]
  13. },
  14. "finnish_stemmer": {
  15. "type": "stemmer",
  16. "language": "finnish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_finnish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "finnish_stop",
  25. "finnish_keywords",
  26. "finnish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

french analyzer

The french analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="french_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "french_elision": {
  7. "type": "elision",
  8. "articles_case": True,
  9. "articles": [
  10. "l",
  11. "m",
  12. "t",
  13. "qu",
  14. "n",
  15. "s",
  16. "j",
  17. "d",
  18. "c",
  19. "jusqu",
  20. "quoiqu",
  21. "lorsqu",
  22. "puisqu"
  23. ]
  24. },
  25. "french_stop": {
  26. "type": "stop",
  27. "stopwords": "_french_"
  28. },
  29. "french_keywords": {
  30. "type": "keyword_marker",
  31. "keywords": [
  32. "Example"
  33. ]
  34. },
  35. "french_stemmer": {
  36. "type": "stemmer",
  37. "language": "light_french"
  38. }
  39. },
  40. "analyzer": {
  41. "rebuilt_french": {
  42. "tokenizer": "standard",
  43. "filter": [
  44. "french_elision",
  45. "lowercase",
  46. "french_stop",
  47. "french_keywords",
  48. "french_stemmer"
  49. ]
  50. }
  51. }
  52. }
  53. },
  54. )
  55. print(resp)
  1. response = client.indices.create(
  2. index: 'french_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. french_elision: {
  8. type: 'elision',
  9. articles_case: true,
  10. articles: [
  11. 'l',
  12. 'm',
  13. 't',
  14. 'qu',
  15. 'n',
  16. 's',
  17. 'j',
  18. 'd',
  19. 'c',
  20. 'jusqu',
  21. 'quoiqu',
  22. 'lorsqu',
  23. 'puisqu'
  24. ]
  25. },
  26. french_stop: {
  27. type: 'stop',
  28. stopwords: '_french_'
  29. },
  30. french_keywords: {
  31. type: 'keyword_marker',
  32. keywords: [
  33. 'Example'
  34. ]
  35. },
  36. french_stemmer: {
  37. type: 'stemmer',
  38. language: 'light_french'
  39. }
  40. },
  41. analyzer: {
  42. rebuilt_french: {
  43. tokenizer: 'standard',
  44. filter: [
  45. 'french_elision',
  46. 'lowercase',
  47. 'french_stop',
  48. 'french_keywords',
  49. 'french_stemmer'
  50. ]
  51. }
  52. }
  53. }
  54. }
  55. }
  56. )
  57. puts response
  1. const response = await client.indices.create({
  2. index: "french_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. french_elision: {
  7. type: "elision",
  8. articles_case: true,
  9. articles: [
  10. "l",
  11. "m",
  12. "t",
  13. "qu",
  14. "n",
  15. "s",
  16. "j",
  17. "d",
  18. "c",
  19. "jusqu",
  20. "quoiqu",
  21. "lorsqu",
  22. "puisqu",
  23. ],
  24. },
  25. french_stop: {
  26. type: "stop",
  27. stopwords: "_french_",
  28. },
  29. french_keywords: {
  30. type: "keyword_marker",
  31. keywords: ["Example"],
  32. },
  33. french_stemmer: {
  34. type: "stemmer",
  35. language: "light_french",
  36. },
  37. },
  38. analyzer: {
  39. rebuilt_french: {
  40. tokenizer: "standard",
  41. filter: [
  42. "french_elision",
  43. "lowercase",
  44. "french_stop",
  45. "french_keywords",
  46. "french_stemmer",
  47. ],
  48. },
  49. },
  50. },
  51. },
  52. });
  53. console.log(response);
  1. PUT /french_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "french_elision": {
  7. "type": "elision",
  8. "articles_case": true,
  9. "articles": [
  10. "l", "m", "t", "qu", "n", "s",
  11. "j", "d", "c", "jusqu", "quoiqu",
  12. "lorsqu", "puisqu"
  13. ]
  14. },
  15. "french_stop": {
  16. "type": "stop",
  17. "stopwords": "_french_"
  18. },
  19. "french_keywords": {
  20. "type": "keyword_marker",
  21. "keywords": ["Example"]
  22. },
  23. "french_stemmer": {
  24. "type": "stemmer",
  25. "language": "light_french"
  26. }
  27. },
  28. "analyzer": {
  29. "rebuilt_french": {
  30. "tokenizer": "standard",
  31. "filter": [
  32. "french_elision",
  33. "lowercase",
  34. "french_stop",
  35. "french_keywords",
  36. "french_stemmer"
  37. ]
  38. }
  39. }
  40. }
  41. }
  42. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

galician analyzer

The galician analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="galician_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "galician_stop": {
  7. "type": "stop",
  8. "stopwords": "_galician_"
  9. },
  10. "galician_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplo"
  14. ]
  15. },
  16. "galician_stemmer": {
  17. "type": "stemmer",
  18. "language": "galician"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_galician": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "galician_stop",
  27. "galician_keywords",
  28. "galician_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'galician_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. galician_stop: {
  8. type: 'stop',
  9. stopwords: '_galician_'
  10. },
  11. galician_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplo'
  15. ]
  16. },
  17. galician_stemmer: {
  18. type: 'stemmer',
  19. language: 'galician'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_galician: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'galician_stop',
  28. 'galician_keywords',
  29. 'galician_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "galician_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. galician_stop: {
  7. type: "stop",
  8. stopwords: "_galician_",
  9. },
  10. galician_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplo"],
  13. },
  14. galician_stemmer: {
  15. type: "stemmer",
  16. language: "galician",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_galician: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "galician_stop",
  25. "galician_keywords",
  26. "galician_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /galician_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "galician_stop": {
  7. "type": "stop",
  8. "stopwords": "_galician_"
  9. },
  10. "galician_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplo"]
  13. },
  14. "galician_stemmer": {
  15. "type": "stemmer",
  16. "language": "galician"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_galician": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "galician_stop",
  25. "galician_keywords",
  26. "galician_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

german analyzer

The german analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="german_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "german_stop": {
  7. "type": "stop",
  8. "stopwords": "_german_"
  9. },
  10. "german_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "Beispiel"
  14. ]
  15. },
  16. "german_stemmer": {
  17. "type": "stemmer",
  18. "language": "light_german"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_german": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "german_stop",
  27. "german_keywords",
  28. "german_normalization",
  29. "german_stemmer"
  30. ]
  31. }
  32. }
  33. }
  34. },
  35. )
  36. print(resp)
  1. response = client.indices.create(
  2. index: 'german_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. german_stop: {
  8. type: 'stop',
  9. stopwords: '_german_'
  10. },
  11. german_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'Beispiel'
  15. ]
  16. },
  17. german_stemmer: {
  18. type: 'stemmer',
  19. language: 'light_german'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_german: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'german_stop',
  28. 'german_keywords',
  29. 'german_normalization',
  30. 'german_stemmer'
  31. ]
  32. }
  33. }
  34. }
  35. }
  36. }
  37. )
  38. puts response
  1. const response = await client.indices.create({
  2. index: "german_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. german_stop: {
  7. type: "stop",
  8. stopwords: "_german_",
  9. },
  10. german_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["Beispiel"],
  13. },
  14. german_stemmer: {
  15. type: "stemmer",
  16. language: "light_german",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_german: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "german_stop",
  25. "german_keywords",
  26. "german_normalization",
  27. "german_stemmer",
  28. ],
  29. },
  30. },
  31. },
  32. },
  33. });
  34. console.log(response);
  1. PUT /german_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "german_stop": {
  7. "type": "stop",
  8. "stopwords": "_german_"
  9. },
  10. "german_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["Beispiel"]
  13. },
  14. "german_stemmer": {
  15. "type": "stemmer",
  16. "language": "light_german"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_german": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "german_stop",
  25. "german_keywords",
  26. "german_normalization",
  27. "german_stemmer"
  28. ]
  29. }
  30. }
  31. }
  32. }
  33. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

greek analyzer

The greek analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="greek_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "greek_stop": {
  7. "type": "stop",
  8. "stopwords": "_greek_"
  9. },
  10. "greek_lowercase": {
  11. "type": "lowercase",
  12. "language": "greek"
  13. },
  14. "greek_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": [
  17. "παράδειγμα"
  18. ]
  19. },
  20. "greek_stemmer": {
  21. "type": "stemmer",
  22. "language": "greek"
  23. }
  24. },
  25. "analyzer": {
  26. "rebuilt_greek": {
  27. "tokenizer": "standard",
  28. "filter": [
  29. "greek_lowercase",
  30. "greek_stop",
  31. "greek_keywords",
  32. "greek_stemmer"
  33. ]
  34. }
  35. }
  36. }
  37. },
  38. )
  39. print(resp)
  1. response = client.indices.create(
  2. index: 'greek_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. greek_stop: {
  8. type: 'stop',
  9. stopwords: '_greek_'
  10. },
  11. greek_lowercase: {
  12. type: 'lowercase',
  13. language: 'greek'
  14. },
  15. greek_keywords: {
  16. type: 'keyword_marker',
  17. keywords: [
  18. 'παράδειγμα'
  19. ]
  20. },
  21. greek_stemmer: {
  22. type: 'stemmer',
  23. language: 'greek'
  24. }
  25. },
  26. analyzer: {
  27. rebuilt_greek: {
  28. tokenizer: 'standard',
  29. filter: [
  30. 'greek_lowercase',
  31. 'greek_stop',
  32. 'greek_keywords',
  33. 'greek_stemmer'
  34. ]
  35. }
  36. }
  37. }
  38. }
  39. }
  40. )
  41. puts response
  1. const response = await client.indices.create({
  2. index: "greek_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. greek_stop: {
  7. type: "stop",
  8. stopwords: "_greek_",
  9. },
  10. greek_lowercase: {
  11. type: "lowercase",
  12. language: "greek",
  13. },
  14. greek_keywords: {
  15. type: "keyword_marker",
  16. keywords: ["παράδειγμα"],
  17. },
  18. greek_stemmer: {
  19. type: "stemmer",
  20. language: "greek",
  21. },
  22. },
  23. analyzer: {
  24. rebuilt_greek: {
  25. tokenizer: "standard",
  26. filter: [
  27. "greek_lowercase",
  28. "greek_stop",
  29. "greek_keywords",
  30. "greek_stemmer",
  31. ],
  32. },
  33. },
  34. },
  35. },
  36. });
  37. console.log(response);
  1. PUT /greek_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "greek_stop": {
  7. "type": "stop",
  8. "stopwords": "_greek_"
  9. },
  10. "greek_lowercase": {
  11. "type": "lowercase",
  12. "language": "greek"
  13. },
  14. "greek_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": ["παράδειγμα"]
  17. },
  18. "greek_stemmer": {
  19. "type": "stemmer",
  20. "language": "greek"
  21. }
  22. },
  23. "analyzer": {
  24. "rebuilt_greek": {
  25. "tokenizer": "standard",
  26. "filter": [
  27. "greek_lowercase",
  28. "greek_stop",
  29. "greek_keywords",
  30. "greek_stemmer"
  31. ]
  32. }
  33. }
  34. }
  35. }
  36. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

hindi analyzer

The hindi analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="hindi_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "hindi_stop": {
  7. "type": "stop",
  8. "stopwords": "_hindi_"
  9. },
  10. "hindi_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "उदाहरण"
  14. ]
  15. },
  16. "hindi_stemmer": {
  17. "type": "stemmer",
  18. "language": "hindi"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_hindi": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "decimal_digit",
  27. "hindi_keywords",
  28. "indic_normalization",
  29. "hindi_normalization",
  30. "hindi_stop",
  31. "hindi_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. },
  37. )
  38. print(resp)
  1. response = client.indices.create(
  2. index: 'hindi_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. hindi_stop: {
  8. type: 'stop',
  9. stopwords: '_hindi_'
  10. },
  11. hindi_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'उदाहरण'
  15. ]
  16. },
  17. hindi_stemmer: {
  18. type: 'stemmer',
  19. language: 'hindi'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_hindi: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'decimal_digit',
  28. 'hindi_keywords',
  29. 'indic_normalization',
  30. 'hindi_normalization',
  31. 'hindi_stop',
  32. 'hindi_stemmer'
  33. ]
  34. }
  35. }
  36. }
  37. }
  38. }
  39. )
  40. puts response
  1. const response = await client.indices.create({
  2. index: "hindi_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. hindi_stop: {
  7. type: "stop",
  8. stopwords: "_hindi_",
  9. },
  10. hindi_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["उदाहरण"],
  13. },
  14. hindi_stemmer: {
  15. type: "stemmer",
  16. language: "hindi",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_hindi: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "decimal_digit",
  25. "hindi_keywords",
  26. "indic_normalization",
  27. "hindi_normalization",
  28. "hindi_stop",
  29. "hindi_stemmer",
  30. ],
  31. },
  32. },
  33. },
  34. },
  35. });
  36. console.log(response);
  1. PUT /hindi_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "hindi_stop": {
  7. "type": "stop",
  8. "stopwords": "_hindi_"
  9. },
  10. "hindi_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["उदाहरण"]
  13. },
  14. "hindi_stemmer": {
  15. "type": "stemmer",
  16. "language": "hindi"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_hindi": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "decimal_digit",
  25. "hindi_keywords",
  26. "indic_normalization",
  27. "hindi_normalization",
  28. "hindi_stop",
  29. "hindi_stemmer"
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

hungarian analyzer

The hungarian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="hungarian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "hungarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_hungarian_"
  9. },
  10. "hungarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "példa"
  14. ]
  15. },
  16. "hungarian_stemmer": {
  17. "type": "stemmer",
  18. "language": "hungarian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_hungarian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "hungarian_stop",
  27. "hungarian_keywords",
  28. "hungarian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'hungarian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. hungarian_stop: {
  8. type: 'stop',
  9. stopwords: '_hungarian_'
  10. },
  11. hungarian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'példa'
  15. ]
  16. },
  17. hungarian_stemmer: {
  18. type: 'stemmer',
  19. language: 'hungarian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_hungarian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'hungarian_stop',
  28. 'hungarian_keywords',
  29. 'hungarian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "hungarian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. hungarian_stop: {
  7. type: "stop",
  8. stopwords: "_hungarian_",
  9. },
  10. hungarian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["példa"],
  13. },
  14. hungarian_stemmer: {
  15. type: "stemmer",
  16. language: "hungarian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_hungarian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "hungarian_stop",
  25. "hungarian_keywords",
  26. "hungarian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /hungarian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "hungarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_hungarian_"
  9. },
  10. "hungarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["példa"]
  13. },
  14. "hungarian_stemmer": {
  15. "type": "stemmer",
  16. "language": "hungarian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_hungarian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "hungarian_stop",
  25. "hungarian_keywords",
  26. "hungarian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

indonesian analyzer

The indonesian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="indonesian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "indonesian_stop": {
  7. "type": "stop",
  8. "stopwords": "_indonesian_"
  9. },
  10. "indonesian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "contoh"
  14. ]
  15. },
  16. "indonesian_stemmer": {
  17. "type": "stemmer",
  18. "language": "indonesian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_indonesian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "indonesian_stop",
  27. "indonesian_keywords",
  28. "indonesian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'indonesian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. indonesian_stop: {
  8. type: 'stop',
  9. stopwords: '_indonesian_'
  10. },
  11. indonesian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'contoh'
  15. ]
  16. },
  17. indonesian_stemmer: {
  18. type: 'stemmer',
  19. language: 'indonesian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_indonesian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'indonesian_stop',
  28. 'indonesian_keywords',
  29. 'indonesian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "indonesian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. indonesian_stop: {
  7. type: "stop",
  8. stopwords: "_indonesian_",
  9. },
  10. indonesian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["contoh"],
  13. },
  14. indonesian_stemmer: {
  15. type: "stemmer",
  16. language: "indonesian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_indonesian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "indonesian_stop",
  25. "indonesian_keywords",
  26. "indonesian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /indonesian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "indonesian_stop": {
  7. "type": "stop",
  8. "stopwords": "_indonesian_"
  9. },
  10. "indonesian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["contoh"]
  13. },
  14. "indonesian_stemmer": {
  15. "type": "stemmer",
  16. "language": "indonesian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_indonesian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "indonesian_stop",
  25. "indonesian_keywords",
  26. "indonesian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

irish analyzer

The irish analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="irish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "irish_hyphenation": {
  7. "type": "stop",
  8. "stopwords": [
  9. "h",
  10. "n",
  11. "t"
  12. ],
  13. "ignore_case": True
  14. },
  15. "irish_elision": {
  16. "type": "elision",
  17. "articles": [
  18. "d",
  19. "m",
  20. "b"
  21. ],
  22. "articles_case": True
  23. },
  24. "irish_stop": {
  25. "type": "stop",
  26. "stopwords": "_irish_"
  27. },
  28. "irish_lowercase": {
  29. "type": "lowercase",
  30. "language": "irish"
  31. },
  32. "irish_keywords": {
  33. "type": "keyword_marker",
  34. "keywords": [
  35. "sampla"
  36. ]
  37. },
  38. "irish_stemmer": {
  39. "type": "stemmer",
  40. "language": "irish"
  41. }
  42. },
  43. "analyzer": {
  44. "rebuilt_irish": {
  45. "tokenizer": "standard",
  46. "filter": [
  47. "irish_hyphenation",
  48. "irish_elision",
  49. "irish_lowercase",
  50. "irish_stop",
  51. "irish_keywords",
  52. "irish_stemmer"
  53. ]
  54. }
  55. }
  56. }
  57. },
  58. )
  59. print(resp)
  1. response = client.indices.create(
  2. index: 'irish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. irish_hyphenation: {
  8. type: 'stop',
  9. stopwords: [
  10. 'h',
  11. 'n',
  12. 't'
  13. ],
  14. ignore_case: true
  15. },
  16. irish_elision: {
  17. type: 'elision',
  18. articles: [
  19. 'd',
  20. 'm',
  21. 'b'
  22. ],
  23. articles_case: true
  24. },
  25. irish_stop: {
  26. type: 'stop',
  27. stopwords: '_irish_'
  28. },
  29. irish_lowercase: {
  30. type: 'lowercase',
  31. language: 'irish'
  32. },
  33. irish_keywords: {
  34. type: 'keyword_marker',
  35. keywords: [
  36. 'sampla'
  37. ]
  38. },
  39. irish_stemmer: {
  40. type: 'stemmer',
  41. language: 'irish'
  42. }
  43. },
  44. analyzer: {
  45. rebuilt_irish: {
  46. tokenizer: 'standard',
  47. filter: [
  48. 'irish_hyphenation',
  49. 'irish_elision',
  50. 'irish_lowercase',
  51. 'irish_stop',
  52. 'irish_keywords',
  53. 'irish_stemmer'
  54. ]
  55. }
  56. }
  57. }
  58. }
  59. }
  60. )
  61. puts response
  1. const response = await client.indices.create({
  2. index: "irish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. irish_hyphenation: {
  7. type: "stop",
  8. stopwords: ["h", "n", "t"],
  9. ignore_case: true,
  10. },
  11. irish_elision: {
  12. type: "elision",
  13. articles: ["d", "m", "b"],
  14. articles_case: true,
  15. },
  16. irish_stop: {
  17. type: "stop",
  18. stopwords: "_irish_",
  19. },
  20. irish_lowercase: {
  21. type: "lowercase",
  22. language: "irish",
  23. },
  24. irish_keywords: {
  25. type: "keyword_marker",
  26. keywords: ["sampla"],
  27. },
  28. irish_stemmer: {
  29. type: "stemmer",
  30. language: "irish",
  31. },
  32. },
  33. analyzer: {
  34. rebuilt_irish: {
  35. tokenizer: "standard",
  36. filter: [
  37. "irish_hyphenation",
  38. "irish_elision",
  39. "irish_lowercase",
  40. "irish_stop",
  41. "irish_keywords",
  42. "irish_stemmer",
  43. ],
  44. },
  45. },
  46. },
  47. },
  48. });
  49. console.log(response);
  1. PUT /irish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "irish_hyphenation": {
  7. "type": "stop",
  8. "stopwords": [ "h", "n", "t" ],
  9. "ignore_case": true
  10. },
  11. "irish_elision": {
  12. "type": "elision",
  13. "articles": [ "d", "m", "b" ],
  14. "articles_case": true
  15. },
  16. "irish_stop": {
  17. "type": "stop",
  18. "stopwords": "_irish_"
  19. },
  20. "irish_lowercase": {
  21. "type": "lowercase",
  22. "language": "irish"
  23. },
  24. "irish_keywords": {
  25. "type": "keyword_marker",
  26. "keywords": ["sampla"]
  27. },
  28. "irish_stemmer": {
  29. "type": "stemmer",
  30. "language": "irish"
  31. }
  32. },
  33. "analyzer": {
  34. "rebuilt_irish": {
  35. "tokenizer": "standard",
  36. "filter": [
  37. "irish_hyphenation",
  38. "irish_elision",
  39. "irish_lowercase",
  40. "irish_stop",
  41. "irish_keywords",
  42. "irish_stemmer"
  43. ]
  44. }
  45. }
  46. }
  47. }
  48. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

italian analyzer

The italian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="italian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "italian_elision": {
  7. "type": "elision",
  8. "articles": [
  9. "c",
  10. "l",
  11. "all",
  12. "dall",
  13. "dell",
  14. "nell",
  15. "sull",
  16. "coll",
  17. "pell",
  18. "gl",
  19. "agl",
  20. "dagl",
  21. "degl",
  22. "negl",
  23. "sugl",
  24. "un",
  25. "m",
  26. "t",
  27. "s",
  28. "v",
  29. "d"
  30. ],
  31. "articles_case": True
  32. },
  33. "italian_stop": {
  34. "type": "stop",
  35. "stopwords": "_italian_"
  36. },
  37. "italian_keywords": {
  38. "type": "keyword_marker",
  39. "keywords": [
  40. "esempio"
  41. ]
  42. },
  43. "italian_stemmer": {
  44. "type": "stemmer",
  45. "language": "light_italian"
  46. }
  47. },
  48. "analyzer": {
  49. "rebuilt_italian": {
  50. "tokenizer": "standard",
  51. "filter": [
  52. "italian_elision",
  53. "lowercase",
  54. "italian_stop",
  55. "italian_keywords",
  56. "italian_stemmer"
  57. ]
  58. }
  59. }
  60. }
  61. },
  62. )
  63. print(resp)
  1. response = client.indices.create(
  2. index: 'italian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. italian_elision: {
  8. type: 'elision',
  9. articles: [
  10. 'c',
  11. 'l',
  12. 'all',
  13. 'dall',
  14. 'dell',
  15. 'nell',
  16. 'sull',
  17. 'coll',
  18. 'pell',
  19. 'gl',
  20. 'agl',
  21. 'dagl',
  22. 'degl',
  23. 'negl',
  24. 'sugl',
  25. 'un',
  26. 'm',
  27. 't',
  28. 's',
  29. 'v',
  30. 'd'
  31. ],
  32. articles_case: true
  33. },
  34. italian_stop: {
  35. type: 'stop',
  36. stopwords: '_italian_'
  37. },
  38. italian_keywords: {
  39. type: 'keyword_marker',
  40. keywords: [
  41. 'esempio'
  42. ]
  43. },
  44. italian_stemmer: {
  45. type: 'stemmer',
  46. language: 'light_italian'
  47. }
  48. },
  49. analyzer: {
  50. rebuilt_italian: {
  51. tokenizer: 'standard',
  52. filter: [
  53. 'italian_elision',
  54. 'lowercase',
  55. 'italian_stop',
  56. 'italian_keywords',
  57. 'italian_stemmer'
  58. ]
  59. }
  60. }
  61. }
  62. }
  63. }
  64. )
  65. puts response
  1. const response = await client.indices.create({
  2. index: "italian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. italian_elision: {
  7. type: "elision",
  8. articles: [
  9. "c",
  10. "l",
  11. "all",
  12. "dall",
  13. "dell",
  14. "nell",
  15. "sull",
  16. "coll",
  17. "pell",
  18. "gl",
  19. "agl",
  20. "dagl",
  21. "degl",
  22. "negl",
  23. "sugl",
  24. "un",
  25. "m",
  26. "t",
  27. "s",
  28. "v",
  29. "d",
  30. ],
  31. articles_case: true,
  32. },
  33. italian_stop: {
  34. type: "stop",
  35. stopwords: "_italian_",
  36. },
  37. italian_keywords: {
  38. type: "keyword_marker",
  39. keywords: ["esempio"],
  40. },
  41. italian_stemmer: {
  42. type: "stemmer",
  43. language: "light_italian",
  44. },
  45. },
  46. analyzer: {
  47. rebuilt_italian: {
  48. tokenizer: "standard",
  49. filter: [
  50. "italian_elision",
  51. "lowercase",
  52. "italian_stop",
  53. "italian_keywords",
  54. "italian_stemmer",
  55. ],
  56. },
  57. },
  58. },
  59. },
  60. });
  61. console.log(response);
  1. PUT /italian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "italian_elision": {
  7. "type": "elision",
  8. "articles": [
  9. "c", "l", "all", "dall", "dell",
  10. "nell", "sull", "coll", "pell",
  11. "gl", "agl", "dagl", "degl", "negl",
  12. "sugl", "un", "m", "t", "s", "v", "d"
  13. ],
  14. "articles_case": true
  15. },
  16. "italian_stop": {
  17. "type": "stop",
  18. "stopwords": "_italian_"
  19. },
  20. "italian_keywords": {
  21. "type": "keyword_marker",
  22. "keywords": ["esempio"]
  23. },
  24. "italian_stemmer": {
  25. "type": "stemmer",
  26. "language": "light_italian"
  27. }
  28. },
  29. "analyzer": {
  30. "rebuilt_italian": {
  31. "tokenizer": "standard",
  32. "filter": [
  33. "italian_elision",
  34. "lowercase",
  35. "italian_stop",
  36. "italian_keywords",
  37. "italian_stemmer"
  38. ]
  39. }
  40. }
  41. }
  42. }
  43. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

latvian analyzer

The latvian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="latvian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "latvian_stop": {
  7. "type": "stop",
  8. "stopwords": "_latvian_"
  9. },
  10. "latvian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "piemērs"
  14. ]
  15. },
  16. "latvian_stemmer": {
  17. "type": "stemmer",
  18. "language": "latvian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_latvian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "latvian_stop",
  27. "latvian_keywords",
  28. "latvian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'latvian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. latvian_stop: {
  8. type: 'stop',
  9. stopwords: '_latvian_'
  10. },
  11. latvian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'piemērs'
  15. ]
  16. },
  17. latvian_stemmer: {
  18. type: 'stemmer',
  19. language: 'latvian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_latvian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'latvian_stop',
  28. 'latvian_keywords',
  29. 'latvian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "latvian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. latvian_stop: {
  7. type: "stop",
  8. stopwords: "_latvian_",
  9. },
  10. latvian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["piemērs"],
  13. },
  14. latvian_stemmer: {
  15. type: "stemmer",
  16. language: "latvian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_latvian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "latvian_stop",
  25. "latvian_keywords",
  26. "latvian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /latvian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "latvian_stop": {
  7. "type": "stop",
  8. "stopwords": "_latvian_"
  9. },
  10. "latvian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["piemērs"]
  13. },
  14. "latvian_stemmer": {
  15. "type": "stemmer",
  16. "language": "latvian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_latvian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "latvian_stop",
  25. "latvian_keywords",
  26. "latvian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

lithuanian analyzer

The lithuanian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="lithuanian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "lithuanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_lithuanian_"
  9. },
  10. "lithuanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "pavyzdys"
  14. ]
  15. },
  16. "lithuanian_stemmer": {
  17. "type": "stemmer",
  18. "language": "lithuanian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_lithuanian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "lithuanian_stop",
  27. "lithuanian_keywords",
  28. "lithuanian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'lithuanian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. lithuanian_stop: {
  8. type: 'stop',
  9. stopwords: '_lithuanian_'
  10. },
  11. lithuanian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'pavyzdys'
  15. ]
  16. },
  17. lithuanian_stemmer: {
  18. type: 'stemmer',
  19. language: 'lithuanian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_lithuanian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'lithuanian_stop',
  28. 'lithuanian_keywords',
  29. 'lithuanian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "lithuanian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. lithuanian_stop: {
  7. type: "stop",
  8. stopwords: "_lithuanian_",
  9. },
  10. lithuanian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["pavyzdys"],
  13. },
  14. lithuanian_stemmer: {
  15. type: "stemmer",
  16. language: "lithuanian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_lithuanian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "lithuanian_stop",
  25. "lithuanian_keywords",
  26. "lithuanian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /lithuanian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "lithuanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_lithuanian_"
  9. },
  10. "lithuanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["pavyzdys"]
  13. },
  14. "lithuanian_stemmer": {
  15. "type": "stemmer",
  16. "language": "lithuanian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_lithuanian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "lithuanian_stop",
  25. "lithuanian_keywords",
  26. "lithuanian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

norwegian analyzer

The norwegian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="norwegian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "norwegian_stop": {
  7. "type": "stop",
  8. "stopwords": "_norwegian_"
  9. },
  10. "norwegian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "eksempel"
  14. ]
  15. },
  16. "norwegian_stemmer": {
  17. "type": "stemmer",
  18. "language": "norwegian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_norwegian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "norwegian_stop",
  27. "norwegian_keywords",
  28. "norwegian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'norwegian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. norwegian_stop: {
  8. type: 'stop',
  9. stopwords: '_norwegian_'
  10. },
  11. norwegian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'eksempel'
  15. ]
  16. },
  17. norwegian_stemmer: {
  18. type: 'stemmer',
  19. language: 'norwegian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_norwegian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'norwegian_stop',
  28. 'norwegian_keywords',
  29. 'norwegian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "norwegian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. norwegian_stop: {
  7. type: "stop",
  8. stopwords: "_norwegian_",
  9. },
  10. norwegian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["eksempel"],
  13. },
  14. norwegian_stemmer: {
  15. type: "stemmer",
  16. language: "norwegian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_norwegian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "norwegian_stop",
  25. "norwegian_keywords",
  26. "norwegian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /norwegian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "norwegian_stop": {
  7. "type": "stop",
  8. "stopwords": "_norwegian_"
  9. },
  10. "norwegian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["eksempel"]
  13. },
  14. "norwegian_stemmer": {
  15. "type": "stemmer",
  16. "language": "norwegian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_norwegian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "norwegian_stop",
  25. "norwegian_keywords",
  26. "norwegian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

persian analyzer

The persian analyzer could be reimplemented as a custom analyzer as follows:

  1. response = client.indices.create(
  2. index: 'persian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. char_filter: {
  7. zero_width_spaces: {
  8. type: 'mapping',
  9. mappings: [
  10. '\\u200C=>\\u0020'
  11. ]
  12. }
  13. },
  14. filter: {
  15. persian_stop: {
  16. type: 'stop',
  17. stopwords: '_persian_'
  18. }
  19. },
  20. analyzer: {
  21. rebuilt_persian: {
  22. tokenizer: 'standard',
  23. char_filter: [
  24. 'zero_width_spaces'
  25. ],
  26. filter: [
  27. 'lowercase',
  28. 'decimal_digit',
  29. 'arabic_normalization',
  30. 'persian_normalization',
  31. 'persian_stop'
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
  38. )
  39. puts response
  1. PUT /persian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "char_filter": {
  6. "zero_width_spaces": {
  7. "type": "mapping",
  8. "mappings": [ "\\u200C=>\\u0020"]
  9. }
  10. },
  11. "filter": {
  12. "persian_stop": {
  13. "type": "stop",
  14. "stopwords": "_persian_"
  15. }
  16. },
  17. "analyzer": {
  18. "rebuilt_persian": {
  19. "tokenizer": "standard",
  20. "char_filter": [ "zero_width_spaces" ],
  21. "filter": [
  22. "lowercase",
  23. "decimal_digit",
  24. "arabic_normalization",
  25. "persian_normalization",
  26. "persian_stop"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

Replaces zero-width non-joiners with an ASCII space.

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

portuguese analyzer

The portuguese analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="portuguese_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "portuguese_stop": {
  7. "type": "stop",
  8. "stopwords": "_portuguese_"
  9. },
  10. "portuguese_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplo"
  14. ]
  15. },
  16. "portuguese_stemmer": {
  17. "type": "stemmer",
  18. "language": "light_portuguese"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_portuguese": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "portuguese_stop",
  27. "portuguese_keywords",
  28. "portuguese_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'portuguese_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. portuguese_stop: {
  8. type: 'stop',
  9. stopwords: '_portuguese_'
  10. },
  11. portuguese_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplo'
  15. ]
  16. },
  17. portuguese_stemmer: {
  18. type: 'stemmer',
  19. language: 'light_portuguese'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_portuguese: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'portuguese_stop',
  28. 'portuguese_keywords',
  29. 'portuguese_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "portuguese_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. portuguese_stop: {
  7. type: "stop",
  8. stopwords: "_portuguese_",
  9. },
  10. portuguese_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplo"],
  13. },
  14. portuguese_stemmer: {
  15. type: "stemmer",
  16. language: "light_portuguese",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_portuguese: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "portuguese_stop",
  25. "portuguese_keywords",
  26. "portuguese_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /portuguese_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "portuguese_stop": {
  7. "type": "stop",
  8. "stopwords": "_portuguese_"
  9. },
  10. "portuguese_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplo"]
  13. },
  14. "portuguese_stemmer": {
  15. "type": "stemmer",
  16. "language": "light_portuguese"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_portuguese": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "portuguese_stop",
  25. "portuguese_keywords",
  26. "portuguese_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

romanian analyzer

The romanian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="romanian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "romanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_romanian_"
  9. },
  10. "romanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplu"
  14. ]
  15. },
  16. "romanian_stemmer": {
  17. "type": "stemmer",
  18. "language": "romanian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_romanian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "romanian_stop",
  27. "romanian_keywords",
  28. "romanian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'romanian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. romanian_stop: {
  8. type: 'stop',
  9. stopwords: '_romanian_'
  10. },
  11. romanian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplu'
  15. ]
  16. },
  17. romanian_stemmer: {
  18. type: 'stemmer',
  19. language: 'romanian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_romanian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'romanian_stop',
  28. 'romanian_keywords',
  29. 'romanian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "romanian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. romanian_stop: {
  7. type: "stop",
  8. stopwords: "_romanian_",
  9. },
  10. romanian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplu"],
  13. },
  14. romanian_stemmer: {
  15. type: "stemmer",
  16. language: "romanian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_romanian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "romanian_stop",
  25. "romanian_keywords",
  26. "romanian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /romanian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "romanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_romanian_"
  9. },
  10. "romanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplu"]
  13. },
  14. "romanian_stemmer": {
  15. "type": "stemmer",
  16. "language": "romanian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_romanian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "romanian_stop",
  25. "romanian_keywords",
  26. "romanian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

russian analyzer

The russian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="russian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "russian_stop": {
  7. "type": "stop",
  8. "stopwords": "_russian_"
  9. },
  10. "russian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "пример"
  14. ]
  15. },
  16. "russian_stemmer": {
  17. "type": "stemmer",
  18. "language": "russian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_russian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "russian_stop",
  27. "russian_keywords",
  28. "russian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'russian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. russian_stop: {
  8. type: 'stop',
  9. stopwords: '_russian_'
  10. },
  11. russian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'пример'
  15. ]
  16. },
  17. russian_stemmer: {
  18. type: 'stemmer',
  19. language: 'russian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_russian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'russian_stop',
  28. 'russian_keywords',
  29. 'russian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "russian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. russian_stop: {
  7. type: "stop",
  8. stopwords: "_russian_",
  9. },
  10. russian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["пример"],
  13. },
  14. russian_stemmer: {
  15. type: "stemmer",
  16. language: "russian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_russian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "russian_stop",
  25. "russian_keywords",
  26. "russian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /russian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "russian_stop": {
  7. "type": "stop",
  8. "stopwords": "_russian_"
  9. },
  10. "russian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["пример"]
  13. },
  14. "russian_stemmer": {
  15. "type": "stemmer",
  16. "language": "russian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_russian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "russian_stop",
  25. "russian_keywords",
  26. "russian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

serbian analyzer

The serbian analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="serbian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "serbian_stop": {
  7. "type": "stop",
  8. "stopwords": "_serbian_"
  9. },
  10. "serbian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "пример"
  14. ]
  15. },
  16. "serbian_stemmer": {
  17. "type": "stemmer",
  18. "language": "serbian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_serbian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "serbian_stop",
  27. "serbian_keywords",
  28. "serbian_stemmer",
  29. "serbian_normalization"
  30. ]
  31. }
  32. }
  33. }
  34. },
  35. )
  36. print(resp)
  1. response = client.indices.create(
  2. index: 'serbian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. serbian_stop: {
  8. type: 'stop',
  9. stopwords: '_serbian_'
  10. },
  11. serbian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'пример'
  15. ]
  16. },
  17. serbian_stemmer: {
  18. type: 'stemmer',
  19. language: 'serbian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_serbian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'serbian_stop',
  28. 'serbian_keywords',
  29. 'serbian_stemmer',
  30. 'serbian_normalization'
  31. ]
  32. }
  33. }
  34. }
  35. }
  36. }
  37. )
  38. puts response
  1. const response = await client.indices.create({
  2. index: "serbian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. serbian_stop: {
  7. type: "stop",
  8. stopwords: "_serbian_",
  9. },
  10. serbian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["пример"],
  13. },
  14. serbian_stemmer: {
  15. type: "stemmer",
  16. language: "serbian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_serbian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "serbian_stop",
  25. "serbian_keywords",
  26. "serbian_stemmer",
  27. "serbian_normalization",
  28. ],
  29. },
  30. },
  31. },
  32. },
  33. });
  34. console.log(response);
  1. PUT /serbian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "serbian_stop": {
  7. "type": "stop",
  8. "stopwords": "_serbian_"
  9. },
  10. "serbian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["пример"]
  13. },
  14. "serbian_stemmer": {
  15. "type": "stemmer",
  16. "language": "serbian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_serbian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "serbian_stop",
  25. "serbian_keywords",
  26. "serbian_stemmer",
  27. "serbian_normalization"
  28. ]
  29. }
  30. }
  31. }
  32. }
  33. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

sorani analyzer

The sorani analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="sorani_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "sorani_stop": {
  7. "type": "stop",
  8. "stopwords": "_sorani_"
  9. },
  10. "sorani_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "mînak"
  14. ]
  15. },
  16. "sorani_stemmer": {
  17. "type": "stemmer",
  18. "language": "sorani"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_sorani": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "sorani_normalization",
  26. "lowercase",
  27. "decimal_digit",
  28. "sorani_stop",
  29. "sorani_keywords",
  30. "sorani_stemmer"
  31. ]
  32. }
  33. }
  34. }
  35. },
  36. )
  37. print(resp)
  1. response = client.indices.create(
  2. index: 'sorani_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. sorani_stop: {
  8. type: 'stop',
  9. stopwords: '_sorani_'
  10. },
  11. sorani_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'mînak'
  15. ]
  16. },
  17. sorani_stemmer: {
  18. type: 'stemmer',
  19. language: 'sorani'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_sorani: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'sorani_normalization',
  27. 'lowercase',
  28. 'decimal_digit',
  29. 'sorani_stop',
  30. 'sorani_keywords',
  31. 'sorani_stemmer'
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
  38. )
  39. puts response
  1. const response = await client.indices.create({
  2. index: "sorani_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. sorani_stop: {
  7. type: "stop",
  8. stopwords: "_sorani_",
  9. },
  10. sorani_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["mînak"],
  13. },
  14. sorani_stemmer: {
  15. type: "stemmer",
  16. language: "sorani",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_sorani: {
  21. tokenizer: "standard",
  22. filter: [
  23. "sorani_normalization",
  24. "lowercase",
  25. "decimal_digit",
  26. "sorani_stop",
  27. "sorani_keywords",
  28. "sorani_stemmer",
  29. ],
  30. },
  31. },
  32. },
  33. },
  34. });
  35. console.log(response);
  1. PUT /sorani_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "sorani_stop": {
  7. "type": "stop",
  8. "stopwords": "_sorani_"
  9. },
  10. "sorani_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["mînak"]
  13. },
  14. "sorani_stemmer": {
  15. "type": "stemmer",
  16. "language": "sorani"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_sorani": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "sorani_normalization",
  24. "lowercase",
  25. "decimal_digit",
  26. "sorani_stop",
  27. "sorani_keywords",
  28. "sorani_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. }
  34. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

spanish analyzer

The spanish analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="spanish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "spanish_stop": {
  7. "type": "stop",
  8. "stopwords": "_spanish_"
  9. },
  10. "spanish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "ejemplo"
  14. ]
  15. },
  16. "spanish_stemmer": {
  17. "type": "stemmer",
  18. "language": "light_spanish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_spanish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "spanish_stop",
  27. "spanish_keywords",
  28. "spanish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'spanish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. spanish_stop: {
  8. type: 'stop',
  9. stopwords: '_spanish_'
  10. },
  11. spanish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'ejemplo'
  15. ]
  16. },
  17. spanish_stemmer: {
  18. type: 'stemmer',
  19. language: 'light_spanish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_spanish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'spanish_stop',
  28. 'spanish_keywords',
  29. 'spanish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "spanish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. spanish_stop: {
  7. type: "stop",
  8. stopwords: "_spanish_",
  9. },
  10. spanish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["ejemplo"],
  13. },
  14. spanish_stemmer: {
  15. type: "stemmer",
  16. language: "light_spanish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_spanish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "spanish_stop",
  25. "spanish_keywords",
  26. "spanish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /spanish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "spanish_stop": {
  7. "type": "stop",
  8. "stopwords": "_spanish_"
  9. },
  10. "spanish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["ejemplo"]
  13. },
  14. "spanish_stemmer": {
  15. "type": "stemmer",
  16. "language": "light_spanish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_spanish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "spanish_stop",
  25. "spanish_keywords",
  26. "spanish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

swedish analyzer

The swedish analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="swedish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "swedish_stop": {
  7. "type": "stop",
  8. "stopwords": "_swedish_"
  9. },
  10. "swedish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exempel"
  14. ]
  15. },
  16. "swedish_stemmer": {
  17. "type": "stemmer",
  18. "language": "swedish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_swedish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "swedish_stop",
  27. "swedish_keywords",
  28. "swedish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)
  1. response = client.indices.create(
  2. index: 'swedish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. swedish_stop: {
  8. type: 'stop',
  9. stopwords: '_swedish_'
  10. },
  11. swedish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exempel'
  15. ]
  16. },
  17. swedish_stemmer: {
  18. type: 'stemmer',
  19. language: 'swedish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_swedish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'swedish_stop',
  28. 'swedish_keywords',
  29. 'swedish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response
  1. const response = await client.indices.create({
  2. index: "swedish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. swedish_stop: {
  7. type: "stop",
  8. stopwords: "_swedish_",
  9. },
  10. swedish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exempel"],
  13. },
  14. swedish_stemmer: {
  15. type: "stemmer",
  16. language: "swedish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_swedish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "swedish_stop",
  25. "swedish_keywords",
  26. "swedish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);
  1. PUT /swedish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "swedish_stop": {
  7. "type": "stop",
  8. "stopwords": "_swedish_"
  9. },
  10. "swedish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exempel"]
  13. },
  14. "swedish_stemmer": {
  15. "type": "stemmer",
  16. "language": "swedish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_swedish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "swedish_stop",
  25. "swedish_keywords",
  26. "swedish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

turkish analyzer

The turkish analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="turkish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "turkish_stop": {
  7. "type": "stop",
  8. "stopwords": "_turkish_"
  9. },
  10. "turkish_lowercase": {
  11. "type": "lowercase",
  12. "language": "turkish"
  13. },
  14. "turkish_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": [
  17. "örnek"
  18. ]
  19. },
  20. "turkish_stemmer": {
  21. "type": "stemmer",
  22. "language": "turkish"
  23. }
  24. },
  25. "analyzer": {
  26. "rebuilt_turkish": {
  27. "tokenizer": "standard",
  28. "filter": [
  29. "apostrophe",
  30. "turkish_lowercase",
  31. "turkish_stop",
  32. "turkish_keywords",
  33. "turkish_stemmer"
  34. ]
  35. }
  36. }
  37. }
  38. },
  39. )
  40. print(resp)
  1. response = client.indices.create(
  2. index: 'turkish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. turkish_stop: {
  8. type: 'stop',
  9. stopwords: '_turkish_'
  10. },
  11. turkish_lowercase: {
  12. type: 'lowercase',
  13. language: 'turkish'
  14. },
  15. turkish_keywords: {
  16. type: 'keyword_marker',
  17. keywords: [
  18. 'örnek'
  19. ]
  20. },
  21. turkish_stemmer: {
  22. type: 'stemmer',
  23. language: 'turkish'
  24. }
  25. },
  26. analyzer: {
  27. rebuilt_turkish: {
  28. tokenizer: 'standard',
  29. filter: [
  30. 'apostrophe',
  31. 'turkish_lowercase',
  32. 'turkish_stop',
  33. 'turkish_keywords',
  34. 'turkish_stemmer'
  35. ]
  36. }
  37. }
  38. }
  39. }
  40. }
  41. )
  42. puts response
  1. const response = await client.indices.create({
  2. index: "turkish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. turkish_stop: {
  7. type: "stop",
  8. stopwords: "_turkish_",
  9. },
  10. turkish_lowercase: {
  11. type: "lowercase",
  12. language: "turkish",
  13. },
  14. turkish_keywords: {
  15. type: "keyword_marker",
  16. keywords: ["örnek"],
  17. },
  18. turkish_stemmer: {
  19. type: "stemmer",
  20. language: "turkish",
  21. },
  22. },
  23. analyzer: {
  24. rebuilt_turkish: {
  25. tokenizer: "standard",
  26. filter: [
  27. "apostrophe",
  28. "turkish_lowercase",
  29. "turkish_stop",
  30. "turkish_keywords",
  31. "turkish_stemmer",
  32. ],
  33. },
  34. },
  35. },
  36. },
  37. });
  38. console.log(response);
  1. PUT /turkish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "turkish_stop": {
  7. "type": "stop",
  8. "stopwords": "_turkish_"
  9. },
  10. "turkish_lowercase": {
  11. "type": "lowercase",
  12. "language": "turkish"
  13. },
  14. "turkish_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": ["örnek"]
  17. },
  18. "turkish_stemmer": {
  19. "type": "stemmer",
  20. "language": "turkish"
  21. }
  22. },
  23. "analyzer": {
  24. "rebuilt_turkish": {
  25. "tokenizer": "standard",
  26. "filter": [
  27. "apostrophe",
  28. "turkish_lowercase",
  29. "turkish_stop",
  30. "turkish_keywords",
  31. "turkish_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.

This filter should be removed unless there are words which should be excluded from stemming.

thai analyzer

The thai analyzer could be reimplemented as a custom analyzer as follows:

  1. resp = client.indices.create(
  2. index="thai_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "thai_stop": {
  7. "type": "stop",
  8. "stopwords": "_thai_"
  9. }
  10. },
  11. "analyzer": {
  12. "rebuilt_thai": {
  13. "tokenizer": "thai",
  14. "filter": [
  15. "lowercase",
  16. "decimal_digit",
  17. "thai_stop"
  18. ]
  19. }
  20. }
  21. }
  22. },
  23. )
  24. print(resp)
  1. response = client.indices.create(
  2. index: 'thai_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. thai_stop: {
  8. type: 'stop',
  9. stopwords: '_thai_'
  10. }
  11. },
  12. analyzer: {
  13. rebuilt_thai: {
  14. tokenizer: 'thai',
  15. filter: [
  16. 'lowercase',
  17. 'decimal_digit',
  18. 'thai_stop'
  19. ]
  20. }
  21. }
  22. }
  23. }
  24. }
  25. )
  26. puts response
  1. const response = await client.indices.create({
  2. index: "thai_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. thai_stop: {
  7. type: "stop",
  8. stopwords: "_thai_",
  9. },
  10. },
  11. analyzer: {
  12. rebuilt_thai: {
  13. tokenizer: "thai",
  14. filter: ["lowercase", "decimal_digit", "thai_stop"],
  15. },
  16. },
  17. },
  18. },
  19. });
  20. console.log(response);
  1. PUT /thai_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "thai_stop": {
  7. "type": "stop",
  8. "stopwords": "_thai_"
  9. }
  10. },
  11. "analyzer": {
  12. "rebuilt_thai": {
  13. "tokenizer": "thai",
  14. "filter": [
  15. "lowercase",
  16. "decimal_digit",
  17. "thai_stop"
  18. ]
  19. }
  20. }
  21. }
  22. }
  23. }

The default stopwords can be overridden with the stopwords or stopwords_path parameters.