下面的索引设置是从国外一家优秀的图片搜索公司获取,可以从其设置索引的设计上得到一些如何提高索引设计技巧的方式:
{
"500px.photos-2016-05-06-20-09": {
"aliases": {
"500px.photos": {}
},
"mappings": {
"photo": {
"_all": {
"enabled": false
},
"_routing": {
"required": true,
"path": "user_id"
},
"properties": {
"camera": {
"type": "string",
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
}
}
},
"category": {
"type": "integer"
},
"collections_count": {
"type": "long"
},
"comments_count": {
"type": "integer"
},
"context_tags": {
"type": "nested",
"properties": {
"context": {
"type": "string",
"index": "not_analyzed"
},
"id": {
"type": "integer"
},
"name": {
"type": "string",
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"exact_stemmed_synonyms": {
"type": "string",
"index_analyzer": "exact_stemmed_synonyms",
"search_analyzer": "exact_stemmed_synonyms_search"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
},
"stemmed": {
"type": "string",
"analyzer": "stemmed"
},
"stemmed_synonyms": {
"type": "string",
"index_analyzer": "stemmed_synonyms",
"search_analyzer": "stemmed_synonyms_search"
}
}
},
"name_de": {
"type": "string",
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
},
"stemmed": {
"type": "string",
"analyzer": "stemmed_de"
}
}
},
"weight": {
"type": "float"
},
"weight_new": {
"type": "float"
}
}
},
"context_tags_tags_count": {
"type": "integer"
},
"converted": {
"type": "integer"
},
"created_at": {
"type": "date",
"format": "dateOptionalTime"
},
"delivery_type_current": {
"type": "nested",
"properties": {
"delivery_type": {
"type": "string",
"index": "not_analyzed"
},
"enabled": {
"type": "boolean"
}
}
},
"description": {
"type": "string",
"boost": 5,
"fields": {
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
},
"stemmed": {
"type": "string",
"analyzer": "stemmed"
},
"stemmed_synonyms": {
"type": "string",
"index_analyzer": "stemmed_synonyms",
"search_analyzer": "stemmed_synonyms_search"
}
}
},
"favorites_count": {
"type": "integer"
},
"feature": {
"type": "nested",
"properties": {
"end_at": {
"type": "date",
"format": "dateOptionalTime"
},
"name": {
"type": "string",
"index": "not_analyzed"
},
"start_at": {
"type": "date",
"format": "dateOptionalTime"
}
}
},
"for_sale": {
"type": "boolean"
},
"galleries": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"position": {
"type": "long"
}
}
},
"geo_coordinates": {
"type": "geo_point",
"lat_lon": true
},
"height": {
"type": "integer"
},
"hi_res_uploaded": {
"type": "integer"
},
"highest_rating": {
"type": "float"
},
"id": {
"type": "integer"
},
"image_format": {
"type": "integer"
},
"lens": {
"type": "string",
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
}
}
},
"license_requests_enabled": {
"type": "boolean"
},
"license_type": {
"type": "integer"
},
"licensed_at": {
"type": "date",
"format": "dateOptionalTime"
},
"licensing_status": {
"type": "integer"
},
"licensing_status_organizer_index": {
"type": "integer"
},
"name": {
"type": "string",
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"exact_stemmed_synonyms": {
"type": "string",
"index_analyzer": "exact_stemmed_synonyms",
"search_analyzer": "exact_stemmed_synonyms_search"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
},
"stemmed": {
"type": "string",
"analyzer": "stemmed"
},
"stemmed_synonyms": {
"type": "string",
"index_analyzer": "stemmed_synonyms",
"search_analyzer": "stemmed_synonyms_search"
}
}
},
"nsfw": {
"type": "boolean"
},
"photo_sets": {
"type": "nested",
"properties": {
"id": {
"type": "long"
},
"position": {
"type": "long"
}
}
},
"privacy": {
"type": "integer"
},
"rating": {
"type": "float"
},
"sales_count": {
"type": "integer"
},
"status": {
"type": "integer"
},
"taken_at": {
"type": "date",
"format": "dateOptionalTime"
},
"times_viewed": {
"type": "integer"
},
"updated_at": {
"type": "date",
"format": "dateOptionalTime"
},
"user_firstname": {
"type": "string",
"boost": 4,
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
}
}
},
"user_id": {
"type": "long"
},
"user_lastname": {
"type": "string",
"boost": 4,
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
}
}
},
"user_name": {
"type": "string",
"boost": 4,
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
}
}
},
"user_partner_optout": {
"type": "boolean"
},
"user_status": {
"type": "integer"
},
"user_store_on": {
"type": "boolean"
},
"user_username": {
"type": "string",
"boost": 4,
"fields": {
"exact": {
"type": "string",
"analyzer": "exact"
},
"not_analyzed": {
"type": "string",
"index": "not_analyzed"
},
"prefix": {
"type": "string",
"index_analyzer": "prefix",
"search_analyzer": "prefix_search"
}
}
},
"votes_count": {
"type": "integer"
},
"width": {
"type": "integer"
}
}
}
},
"settings": {
"index": {
"creation_date": "1462579747496",
"uuid": "V9kxOgQPR82FXpj-UN_Rdw",
"analysis": {
"char_filter": {
"amp_and": {
"type": "mapping",
"mappings": [
"&=> and "
]
},
"punctuation": {
"type": "mapping",
"mappings": [
".=> "
]
}
},
"filter": {
"preserved_asciifolding": {
"type": "asciifolding",
"preserve_original": "true"
},
"large_prefixer": {
"max_gram": "100",
"min_gram": "1",
"type": "edgeNGram",
"side": "front"
},
"prefixer": {
"max_gram": "8",
"type": "edgeNGram",
"min_gram": "2",
"side": "front"
},
"german_stemmer": {
"type": "stemmer",
"language": "light_german"
},
"german_stop": {
"type": "stop",
"stopwords": "_german_"
},
"fivegrammer": {
"min_gram": "5",
"type": "nGram",
"max_gram": "5"
},
"synonyms": {
"type": "synonym",
"synonyms_path": "analysis/wn_s.pl",
"format": "wordnet"
},
"trigrammer": {
"type": "nGram",
"min_gram": "3",
"max_gram": "3"
},
"custom_stems": {
"type": "stemmer_override",
"rules_path": "analysis/custom_stems.txt"
}
},
"analyzer": {
"exact_stemmed_synonyms": {
"type": "custom",
"char_filter": [
"amp_and"
],
"filter": [
"asciifolding",
"lowercase",
"trim",
"custom_stems",
"kstem",
"synonyms",
"custom_stems",
"stop"
],
"tokenizer": "keyword"
},
"stemmed": {
"filter": [
"standard",
"lowercase",
"custom_stems",
"stop",
"kstem"
],
"tokenizer": "standard"
},
"exact_stemmed_synonyms_search": {
"type": "custom",
"char_filter": [
"amp_and"
],
"filter": [
"standard",
"asciifolding",
"lowercase",
"trim",
"custom_stems",
"stop",
"kstem"
],
"tokenizer": "standard"
},
"synonyms": {
"type": "custom",
"char_filter": [
"amp_and"
],
"filter": [
"standard",
"lowercase",
"synonyms"
],
"tokenizer": "standard"
},
"partial": {
"filter": [
"preserved_asciifolding",
"large_prefixer"
],
"tokenizer": "lowercase"
},
"prefix_search": {
"tokenizer": "lowercase"
},
"stemmed_synonyms": {
"type": "custom",
"char_filter": [
"amp_and"
],
"filter": [
"standard",
"asciifolding",
"lowercase",
"trim",
"custom_stems",
"kstem",
"synonyms",
"custom_stems",
"stop"
],
"tokenizer": "standard"
},
"fivegram_ascii": {
"filter": [
"standard",
"asciifolding",
"lowercase",
"trim",
"fivegrammer"
],
"tokenizer": "standard"
},
"prefix": {
"filter": [
"preserved_asciifolding",
"prefixer"
],
"tokenizer": "lowercase"
},
"exact": {
"type": "custom",
"char_filter": [
"amp_and"
],
"filter": [
"asciifolding",
"lowercase",
"trim"
],
"tokenizer": "keyword"
},
"stemmed_synonyms_search": {
"type": "custom",
"char_filter": [
"amp_and"
],
"filter": [
"standard",
"asciifolding",
"lowercase",
"trim",
"custom_stems",
"stop",
"kstem"
],
"tokenizer": "standard"
},
"trigram": {
"filter": [
"lowercase",
"trim",
"trigrammer"
],
"tokenizer": "keyword"
},
"stemmed_de": {
"filter": [
"standard",
"asciifolding",
"lowercase",
"german_stop",
"german_normalization",
"german_stemmer"
],
"tokenizer": "standard"
},
"partial_search": {
"tokenizer": "lowercase"
}
}
},
"number_of_replicas": "2",
"number_of_shards": "20",
"refresh_interval": "1",
"version": {
"created": "1040499"
}
}
},
"warmers": {}
}
}
- 500px.photos-2016-05-06-20-09 通过具体日期来命名索引名称,来记录某次索引修改的日期,小细节
- 所有修改的索引都会设置别名:500px.photos
- 参数 “_all”: {
- “enabled”: false
- },有效的阻止进行全文系统在索引阶段对CPU和存储空间资源的开销
- 设置_routing,均以user_id为路由路径,可以使每个用户的图片都会存储到相同的shard中,当进行用户个人图片相关搜索,都会提高搜索效率
- 针对camera进行查询,设置了三种不同的analyzer,1.exact 2.not_analyzed 3.prefix,分别对应精确查找,前缀查
- 设置多条能判断图片质量好坏的计数,如collections_count,comments_count,favorites_count,context_tags_tags_count,sales_count,votes_count
- context_tags是最精华的地方,详细在另一个文档中参考,参考地址:
- context_tags_tags_count单独拿出来说,通过设置这个计数来参看某张图片的权重,关键词越多,说明内容越丰富,相对的每个关键词的权重应该偏低。
- converted,图片被编辑的次数,暂时不清楚其目的,猜测为某张图片被编辑的次数越多,该图片质量应该会相对偏高,只有好的图片,摄影师会对其用心对待(修改内容)