Token count field type
Token count field type
A field of type token_count
is really an integer field which accepts string values, analyzes them, then indexes the number of tokens in the string.
For instance:
resp = client.indices.create(
index="my-index-000001",
mappings={
"properties": {
"name": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "standard"
}
}
}
}
},
)
print(resp)
resp1 = client.index(
index="my-index-000001",
id="1",
document={
"name": "John Smith"
},
)
print(resp1)
resp2 = client.index(
index="my-index-000001",
id="2",
document={
"name": "Rachel Alice Williams"
},
)
print(resp2)
resp3 = client.search(
index="my-index-000001",
query={
"term": {
"name.length": 3
}
},
)
print(resp3)
response = client.indices.create(
index: 'my-index-000001',
body: {
mappings: {
properties: {
name: {
type: 'text',
fields: {
length: {
type: 'token_count',
analyzer: 'standard'
}
}
}
}
}
}
)
puts response
response = client.index(
index: 'my-index-000001',
id: 1,
body: {
name: 'John Smith'
}
)
puts response
response = client.index(
index: 'my-index-000001',
id: 2,
body: {
name: 'Rachel Alice Williams'
}
)
puts response
response = client.search(
index: 'my-index-000001',
body: {
query: {
term: {
'name.length' => 3
}
}
}
)
puts response
const response = await client.indices.create({
index: "my-index-000001",
mappings: {
properties: {
name: {
type: "text",
fields: {
length: {
type: "token_count",
analyzer: "standard",
},
},
},
},
},
});
console.log(response);
const response1 = await client.index({
index: "my-index-000001",
id: 1,
document: {
name: "John Smith",
},
});
console.log(response1);
const response2 = await client.index({
index: "my-index-000001",
id: 2,
document: {
name: "Rachel Alice Williams",
},
});
console.log(response2);
const response3 = await client.search({
index: "my-index-000001",
query: {
term: {
"name.length": 3,
},
},
});
console.log(response3);
PUT my-index-000001
{
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"length": {
"type": "token_count",
"analyzer": "standard"
}
}
}
}
}
}
PUT my-index-000001/_doc/1
{ "name": "John Smith" }
PUT my-index-000001/_doc/2
{ "name": "Rachel Alice Williams" }
GET my-index-000001/_search
{
"query": {
"term": {
"name.length": 3
}
}
}
The | |
The | |
This query matches only the document containing |
Parameters for token_count
fields
The following parameters are accepted by token_count
fields:
The analyzer which should be used to analyze the string value. Required. For best performance, use an analyzer without token filters. | |
| Indicates if position increments should be counted. Set to |
Should the field be stored on disk in a column-stride fashion, so that it can later be used for sorting, aggregations, or scripting? Accepts | |
Should the field be searchable? Accepts | |
Accepts a numeric value of the same | |
Whether the field value should be stored and retrievable separately from the _source field. Accepts |
Synthetic _source
Synthetic _source
is Generally Available only for TSDB indices (indices that have index.mode
set to time_series
). For other indices synthetic _source
is in technical preview. Features in technical preview may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features.
token_count
fields support synthetic _source in their default configuration.