Stemmer override token filter
Stemmer override token filter
Overrides stemming algorithms, by applying a custom mapping, then protecting these terms from being modified by stemmers. Must be placed before any stemming filters.
Rules are mappings in the form of token1[, ..., tokenN] => override
.
Setting | Description |
---|---|
| A list of mapping rules to use. |
| A path (either relative to |
Here is an example:
resp = client.indices.create(
index="my-index-000001",
settings={
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"custom_stems",
"porter_stem"
]
}
},
"filter": {
"custom_stems": {
"type": "stemmer_override",
"rules_path": "analysis/stemmer_override.txt"
}
}
}
},
)
print(resp)
response = client.indices.create(
index: 'my-index-000001',
body: {
settings: {
analysis: {
analyzer: {
my_analyzer: {
tokenizer: 'standard',
filter: [
'lowercase',
'custom_stems',
'porter_stem'
]
}
},
filter: {
custom_stems: {
type: 'stemmer_override',
rules_path: 'analysis/stemmer_override.txt'
}
}
}
}
}
)
puts response
const response = await client.indices.create({
index: "my-index-000001",
settings: {
analysis: {
analyzer: {
my_analyzer: {
tokenizer: "standard",
filter: ["lowercase", "custom_stems", "porter_stem"],
},
},
filter: {
custom_stems: {
type: "stemmer_override",
rules_path: "analysis/stemmer_override.txt",
},
},
},
},
});
console.log(response);
PUT /my-index-000001
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [ "lowercase", "custom_stems", "porter_stem" ]
}
},
"filter": {
"custom_stems": {
"type": "stemmer_override",
"rules_path": "analysis/stemmer_override.txt"
}
}
}
}
}
Where the file looks like:
running, runs => run
stemmer => stemmer
You can also define the overrides rules inline:
resp = client.indices.create(
index="my-index-000001",
settings={
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [
"lowercase",
"custom_stems",
"porter_stem"
]
}
},
"filter": {
"custom_stems": {
"type": "stemmer_override",
"rules": [
"running, runs => run",
"stemmer => stemmer"
]
}
}
}
},
)
print(resp)
response = client.indices.create(
index: 'my-index-000001',
body: {
settings: {
analysis: {
analyzer: {
my_analyzer: {
tokenizer: 'standard',
filter: [
'lowercase',
'custom_stems',
'porter_stem'
]
}
},
filter: {
custom_stems: {
type: 'stemmer_override',
rules: [
'running, runs => run',
'stemmer => stemmer'
]
}
}
}
}
}
)
puts response
const response = await client.indices.create({
index: "my-index-000001",
settings: {
analysis: {
analyzer: {
my_analyzer: {
tokenizer: "standard",
filter: ["lowercase", "custom_stems", "porter_stem"],
},
},
filter: {
custom_stems: {
type: "stemmer_override",
rules: ["running, runs => run", "stemmer => stemmer"],
},
},
},
},
});
console.log(response);
PUT /my-index-000001
{
"settings": {
"analysis": {
"analyzer": {
"my_analyzer": {
"tokenizer": "standard",
"filter": [ "lowercase", "custom_stems", "porter_stem" ]
}
},
"filter": {
"custom_stems": {
"type": "stemmer_override",
"rules": [
"running, runs => run",
"stemmer => stemmer"
]
}
}
}
}
}
当前内容版权归 elasticsearch 或其关联方所有,如需对内容或内容相关联开源项目进行关注与资助,请访问 elasticsearch .