mapping参数解析

官方文档地址：https://www.elastic.co/guide/en/elasticsearch/reference/6.x/mapping-params.html
1. analyzer
指定分词器(分析器更合理)，对索引和查询都有效。如下，指定ik分词的配置
（1）定义索引并定义mapping

PUT test
{
  "mappings": {
    "it":{
      "properties":{
        "name" : {
          "type" : "text",
          "analyzer" : "ik_smart",
          "search_analyzer":"ik_max_word"
        }
      }
    }
  }
}

（2）插入数据

PUT test/it/1
{
  "name" : "美国留给伊拉克的是个烂摊子"
}
PUT test/it/2
{
  "name" : "中国驻洛杉矶领事馆遭亚裔男子枪击，嫌犯已自首"
}
PUT test/it/3
{
  "name" : "中韩渔船冲突调查：韩警平均扣留一艘国渔船"
}
PUT test/it/4
{
  "name" : "公安部：各地校车将享受最高路权"
}

（3）查询

POST test/it/_search
{
  "query": {
    "match": {
      "name": "中国"
    }
  }
}

查询结果：

{
  "took": 8,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 0.65109104,
    "hits": [
      {
        "_index": "test",
        "_type": "it",
        "_id": "2",
        "_score": 0.65109104,
        "_source": {
          "name": "中国驻洛杉矶领事馆遭亚裔男子枪击，嫌犯已自首"
        }
      }
    ]
  }
}

2. normalizer
normalizer用于解析前的标准化配置，比如把所有的字符转化为小写等。
(1) 创建索引

PUT my_index/
{
  "settings": {
    "analysis": {
      "normalizer":{
        "my_normalizer":{
          "type":"custom",
          "char_filter" : [],
          "filter" : ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "_doc" : {
      "properties" : {
        "foo" : {
          "type": "keyword",
          "normalizer": "my_normalizer"
        }
      }
    }
  }
}

(2) 插入数据

PUT my_index/_doc/1
{
  "foo": "BÀR"
}

PUT my_index/_doc/2
{
  "foo": "bar"
}

PUT my_index/_doc/3
{
  "foo": "baz"
}

(3) 查询数据

GET my_index/_search
{
  "query": {
    "term": {
      "foo": "BAR"
    }
  }
}
GET my_index/_search
{
  "query": {
    "match": {
      "foo": "BAR"
    }
  }
}

返回结果：

{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "2",
        "_score": 0.2876821,
        "_source": {
          "foo": "bar"
        }
      },
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "foo": "BÀR"
        }
      }
    ]
  }
}

3.boost
通过指定一个boost值来控制每个查询子句的相对权重，该值默认为1。一个大于1的boost会增加该查询子句的相对权重。
(1) 创建索引并插入数据：

#创建索引
PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "title": {
          "type": "text",
          "boost": 2 
        },
        "content": {
          "type": "text"
        }
      }
    }
  }
}
#插入数据
PUT my_index/_doc/1
{
  "title" : "hello world",
  "content" : "你好世界"
}

(2) 查询：

#查询
POST my_index/_search
{
    "query": {
        "match" : {
            "title": {
                "query": "quick brown fox"
            }
        }
    }
}
#返回结果：
{
  "took": 13,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1.1507283,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 1.1507283,
        "_source": {
          "title": "hello world",
          "content": "你好世界"
        }
      }
    ]
  }
}

boost参数被用来增加一个子句的相对权重(当boost大于1时)，或者减小相对权重(当boost介于0到1时)，但是增加或者减小不是线性的。换言之，boost设为2并不会让最终的_score加倍。相反，新的_score会在适用了boost后被归一化(Normalized)。每种查询都有自己的归一化算法(Normalization Algorithm)。但是能够说一个高的boost值会产生一个高的_score。

4.coerce
coerce属性用于清除脏数据，coerce的默认值是true。整型数字5有可能会被写成字符串“5”或者浮点数5.0.coerce属性可以用来清除脏数据：

字符串会被强制转换为整数
浮点数被强制转换为整数

#创建索引
PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "title": {
          "type": "text"
          
        },
        "content": {
          "type": "text"
        },
        "age" : {
          "type" : "integer",
          "coerce" : false
        }
      }
    }
  }
}
#第一次插入数据
PUT my_index/_doc/1
{
  "title" : "hello world",
  "content" : "你好世界",
  "age" : 5    #注意此处区别
}
#第一次返回结果
{
  "_index": "my_index",
  "_type": "_doc",
  "_id": "1",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 1,
    "failed": 0
  },
  "_seq_no": 0,
  "_primary_term": 1
}
#第二次插入数据：
PUT my_index/_doc/1
{
  "title" : "hello world",
  "content" : "你好世界",
  "age" : "5"   #注意此处区别
}
#第二次返回结果
{
  "error": {
    "root_cause": [
      {
        "type": "mapper_parsing_exception",
        "reason": "failed to parse [age]"
      }
    ],
    "type": "mapper_parsing_exception",
    "reason": "failed to parse [age]",
    "caused_by": {
      "type": "illegal_argument_exception",
      "reason": "Integer value passed as String"
    }
  },
  "status": 400
}

5.copy-to
copy_to属性用于配置自定义的_all字段。换言之，就是多个字段可以合并成一个超级字段。比如，first_name和last_name可以合并为full_name字段。

#创建索引
PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name"
        },
        "full_name" : {
          "type" : "text"
        }
      }
    }
  }
}
#插入数据
PUT my_index/_doc/1
{
  "first_name" : "hello",
  "second_name" : "world"
}
#查询
POST my_index/_search
{
    "query": {
        "match": {
            "full_name": {
                "query": "hello world",
                "operator": "and"
            }
        }
    }
}
#返回结果
{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 0.5753642,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.5753642,
        "_source": {
          "first_name": "hello",
          "second_name": "world"
        }
      }
    ]
  }
}

6.doc_values
doc_values是为了加快排序、聚合操作，在建立倒排索引的时候，额外增加一个列式存储映射，是一个空间换时间的做法。默认是开启的，对于确定不需要聚合或者排序的字段可以关闭。

PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name",
          "doc_values" : false
        },
        "full_name" : {
          "type" : "text"
        }
      }
    }
  }
}

7.dynamic
dynamic属性用于检测新发现的字段（即插入记录是存在字段没有被定义的情况），有三个取值：

true:新发现的字段添加到映射中。（默认）
flase:新检测的字段被忽略。必须显式添加新字段。
strict:如果检测到新字段，就会引发异常并拒绝文档

#创建索引
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name",
          "doc_values" : false
        },
        "full_name" : {
          "type" : "text"
        }
      }
    }
  }
}
#添加文档，添加不存在的字段
PUT my_index/_doc/1
{
  "first_name" : "hello",
  "second_name" : "world",
  "age" : 10
}
#返回结果
{
  "error": {
    "root_cause": [
      {
        "type": "strict_dynamic_mapping_exception",
        "reason": "mapping set to strict, dynamic introduction of [age] within [_doc] is not allowed"
      }
    ],
    "type": "strict_dynamic_mapping_exception",
    "reason": "mapping set to strict, dynamic introduction of [age] within [_doc] is not allowed"
  },
  "status": 400
}

8.enabled
ELasticseaech默认会索引所有的字段，enabled设为false的字段，es会跳过字段内容，该字段只能从_source中获取，但是不可搜。而且字段可以是任意类型。

#创建索引
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name",
          "doc_values" : false
        },
        "full_name" : {
          "type" : "text"
        },
        "age":{
          "enabled": false
        }
      }
    }
  }
}
#插入数据
PUT my_index/_doc/1
{
  "first_name" : "hello",
  "second_name" : "world",
  "age" : 10
}
#查询
POST my_index/_search
{
    "query": {
        "match": {
            "age": {
                "query": 10
                
            }
        }
    }
}
#返回结果
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 0,
    "max_score": null,
    "hits": []
  }
}

9.format
当type(字段类型)为date时指定日期的保存格式。除了使用系统内置的格式还可以使用自己熟悉的格式，例如：yyyy/mm/dd。(格式将在接下来的章节中详细讲解)
10.ignore_above
ignore_above用于指定字段索引和存储的长度最大值，超过最大值的会被忽略(不能用于type类型为text的字段中)

#添加索引
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "keyword" : {
          "type":"keyword",
          "ignore_above" : 5
        }
        
      }
    }
  }
}
#添加第一条数据（不超过5个字符）
PUT my_index/_doc/1
{
  "keyword" : "hello"
}
#添加第二条数据（超过5个字符）
PUT my_index/_doc/2
{
  "keyword" : "hello world"
}
#查询字段
POST my_index/_search
{
    "query": {
        "match": {
            "keyword": {
                "query": "hello"
                
            }
        }
    }
}
#查询结果，超过5个字符的将被忽略
{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "keyword": "hello"
        }
      }
    ]
  }
}

mapping中指定了ignore_above字段的最大长度为5，第一个文档的字段长小于等于5，因此索引成功，第二个超过5，因此不索引

11.ignore_malformed
ignore_malformed可以忽略不规则数据。对于账号userid字段，有人可能填写的是整数类型，也有人填写的是邮件格式。给一个字段索引不合适的数据类型发生异常，导致整个文档索引失败。如果ignore_malformed参数设为true，异常会被忽略，出异常的字段不会被索引，其它字段正常索引。

#第一种情况当ignore_malformed为false时
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "age" : {
          "type":"integer",
          "ignore_malformed" : false
        }
        
      }
    }
  }
}
#插入数据（整型）
PUT my_index/_doc/2
{
  "age" : "10"
}
#返回结果插入成功
{
  "_index": "my_index",
  "_type": "_doc",
  "_id": "2",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 1,
    "failed": 0
  },
  "_seq_no": 0,
  "_primary_term": 1
}
#插入数据（非整形）
PUT my_index/_doc/1
{
  "age" : "hello"
}
#返回结果
{
  "error": {
    "root_cause": [
      {
        "type": "mapper_parsing_exception",
        "reason": "failed to parse [age]"
      }
    ],
    "type": "mapper_parsing_exception",
    "reason": "failed to parse [age]",
    "caused_by": {
      "type": "number_format_exception",
      "reason": "For input string: \"hello\""
    }
  },
  "status": 400
}
#第二种情况，当ignore_malformed为true时
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "age" : {
          "type":"integer",
          "ignore_malformed" : true
        }
        
      }
    }
  }
}
#插入整形数据和非整形数据
PUT my_index/_doc/1
{
  "age" : "hello"
}
PUT my_index/_doc/2
{
  "age" : "10"
}
#均插入成功
{
  "_index": "my_index",
  "_type": "_doc",
  "_id": "2",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 1,
    "failed": 0
  },
  "_seq_no": 0,
  "_primary_term": 1
}

12.index_options
用于控制倒排索引记录的内容，有如下四个配置选项

PUT my_index
{
  "mappings": {
    "my_type": {
      "properties": {
        "text": {
          "type": "text",
          "index_options": "offsets"
        }
      }
    }
  }
}

13.index
index属性用于指定字段是否索引，不索引也就不可搜索，取值可以为true或者false。

PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "name" : {
          "type":"text",
          "index" : false
        },
        "title" : {
          "type" : "text"
        }
        
      }
    }
  }
}

14.null_value
当字段遇到null时得处理策略，默认为null,即为空，此时es会忽略该值。可以通过设定该值设定字段的默认值。（该属性不能用于type类型为:text的字段下）

PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "name" : {
          "type":"text",
          "index" : false
        },
        "title" : {
          "type" : "keyword",
          "null_value" : "null"
        }
        
      }
    }
  }
}

15.fields
fields可以让同一文本有多种不同的索引方式，比如一个String类型的字段，可以使用text类型做全文检索，使用keyword类型做聚合和排序。

fields可以让同一文本有多种不同的索引方式，比如一个String类型的字段，可以使用text类型做全文检索，使用keyword类型做聚合和排序。

fields可以让同一文本有多种不同的索引方式，比如一个String类型的字段，可以使用text类型做全文检索，使用keyword类型做聚合和排序。

PUT my_index
{
  "mappings": {
    "my_type": {
      "properties": {
        "city": {
          "type": "text",
          "fields": {
            "raw": { 
              "type":  "keyword"
            }
          }
        }
      }
    }
  }
}
PUT my_index/my_type/1
{
  "city": "New York"
}

PUT my_index/my_type/2
{
  "city": "York"
}

GET my_index/_search
{
  "query": {
    "match": {
      "city": "york" 
    }
  },
  "sort": {
    "city.raw": "asc" 
  },
  "aggs": {
    "Cities": {
      "terms": {
        "field": "city.raw" 
      }
    }
  }
}

{
  "took": 31,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": null,
    "hits": [
      {
        "_index": "my_index",
        "_type": "my_type",
        "_id": "1",
        "_score": null,
        "_source": {
          "city": "New York"
        },
        "sort": [
          "New York"
        ]
      },
      {
        "_index": "my_index",
        "_type": "my_type",
        "_id": "2",
        "_score": null,
        "_source": {
          "city": "York"
        },
        "sort": [
          "York"
        ]
      }
    ]
  },
  "aggregations": {
    "Cities": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "New York",
          "doc_count": 1
        },
        {
          "key": "York",
          "doc_count": 1
        }
      ]
    }
  }
}

ElasticSearch6.x mapping参数解析

mapping参数解析

相关文章

热门标签

最新文章