ElasticSearch6.x mapping参数解析

x33g5p2x  于2021-03-14 发布在 ElasticSearch  
字(14.8k)|赞(0)|评价(0)|浏览(387)

mapping参数解析

官方文档地址:https://www.elastic.co/guide/en/elasticsearch/reference/6.x/mapping-params.html
1. analyzer
指定分词器(分析器更合理),对索引和查询都有效。如下,指定ik分词的配置
(1)定义索引并定义mapping

PUT test
{
  "mappings": {
    "it":{
      "properties":{
        "name" : {
          "type" : "text",
          "analyzer" : "ik_smart",
          "search_analyzer":"ik_max_word"
        }
      }
    }
  }
}

(2)插入数据

PUT test/it/1
{
  "name" : "美国留给伊拉克的是个烂摊子"
}
PUT test/it/2
{
  "name" : "中国驻洛杉矶领事馆遭亚裔男子枪击,嫌犯已自首"
}
PUT test/it/3
{
  "name" : "中韩渔船冲突调查:韩警平均扣留一艘国渔船"
}
PUT test/it/4
{
  "name" : "公安部:各地校车将享受最高路权"
}

(3)查询

POST test/it/_search
{
  "query": {
    "match": {
      "name": "中国"
    }
  }
}

查询结果:

{
  "took": 8,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 0.65109104,
    "hits": [
      {
        "_index": "test",
        "_type": "it",
        "_id": "2",
        "_score": 0.65109104,
        "_source": {
          "name": "中国驻洛杉矶领事馆遭亚裔男子枪击,嫌犯已自首"
        }
      }
    ]
  }
}

2. normalizer
normalizer用于解析前的标准化配置,比如把所有的字符转化为小写等。
(1) 创建索引

PUT my_index/
{
  "settings": {
    "analysis": {
      "normalizer":{
        "my_normalizer":{
          "type":"custom",
          "char_filter" : [],
          "filter" : ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "_doc" : {
      "properties" : {
        "foo" : {
          "type": "keyword",
          "normalizer": "my_normalizer"
        }
      }
    }
  }
}

(2) 插入数据

PUT my_index/_doc/1
{
  "foo": "BÀR"
}

PUT my_index/_doc/2
{
  "foo": "bar"
}

PUT my_index/_doc/3
{
  "foo": "baz"
}

(3) 查询数据

GET my_index/_search
{
  "query": {
    "term": {
      "foo": "BAR"
    }
  }
}
GET my_index/_search
{
  "query": {
    "match": {
      "foo": "BAR"
    }
  }
}

返回结果:

{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "2",
        "_score": 0.2876821,
        "_source": {
          "foo": "bar"
        }
      },
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "foo": "BÀR"
        }
      }
    ]
  }
}

3.boost
通过指定一个boost值来控制每个查询子句的相对权重,该值默认为1。一个大于1的boost会增加该查询子句的相对权重。
(1) 创建索引并插入数据:

#创建索引
PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "title": {
          "type": "text",
          "boost": 2 
        },
        "content": {
          "type": "text"
        }
      }
    }
  }
}
#插入数据
PUT my_index/_doc/1
{
  "title" : "hello world",
  "content" : "你好世界"
}

(2) 查询:

#查询
POST my_index/_search
{
    "query": {
        "match" : {
            "title": {
                "query": "quick brown fox"
            }
        }
    }
}
#返回结果:
{
  "took": 13,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 1.1507283,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 1.1507283,
        "_source": {
          "title": "hello world",
          "content": "你好世界"
        }
      }
    ]
  }
}

boost参数被用来增加一个子句的相对权重(当boost大于1时),或者减小相对权重(当boost介于0到1时),但是增加或者减小不是线性的。换言之,boost设为2并不会让最终的_score加倍。 相反,新的_score会在适用了boost后被归一化(Normalized)。每种查询都有自己的归一化算法(Normalization Algorithm)。但是能够说一个高的boost值会产生一个高的_score。

4.coerce
coerce属性用于清除脏数据,coerce的默认值是true。整型数字5有可能会被写成字符串“5”或者浮点数5.0.coerce属性可以用来清除脏数据:

  • 字符串会被强制转换为整数
  • 浮点数被强制转换为整数
#创建索引
PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "title": {
          "type": "text"
          
        },
        "content": {
          "type": "text"
        },
        "age" : {
          "type" : "integer",
          "coerce" : false
        }
      }
    }
  }
}
#第一次插入数据
PUT my_index/_doc/1
{
  "title" : "hello world",
  "content" : "你好世界",
  "age" : 5    #注意此处区别
}
#第一次返回结果
{
  "_index": "my_index",
  "_type": "_doc",
  "_id": "1",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 1,
    "failed": 0
  },
  "_seq_no": 0,
  "_primary_term": 1
}
#第二次插入数据:
PUT my_index/_doc/1
{
  "title" : "hello world",
  "content" : "你好世界",
  "age" : "5"   #注意此处区别
}
#第二次返回结果
{
  "error": {
    "root_cause": [
      {
        "type": "mapper_parsing_exception",
        "reason": "failed to parse [age]"
      }
    ],
    "type": "mapper_parsing_exception",
    "reason": "failed to parse [age]",
    "caused_by": {
      "type": "illegal_argument_exception",
      "reason": "Integer value passed as String"
    }
  },
  "status": 400
}

5.copy-to
copy_to属性用于配置自定义的_all字段。换言之,就是多个字段可以合并成一个超级字段。比如,first_name和last_name可以合并为full_name字段。

#创建索引
PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name"
        },
        "full_name" : {
          "type" : "text"
        }
      }
    }
  }
}
#插入数据
PUT my_index/_doc/1
{
  "first_name" : "hello",
  "second_name" : "world"
}
#查询
POST my_index/_search
{
    "query": {
        "match": {
            "full_name": {
                "query": "hello world",
                "operator": "and"
            }
        }
    }
}
#返回结果
{
  "took": 6,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 0.5753642,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.5753642,
        "_source": {
          "first_name": "hello",
          "second_name": "world"
        }
      }
    ]
  }
}

6.doc_values
doc_values是为了加快排序、聚合操作,在建立倒排索引的时候,额外增加一个列式存储映射,是一个空间换时间的做法。默认是开启的,对于确定不需要聚合或者排序的字段可以关闭。

PUT my_index
{
  "mappings": {
    "_doc": {
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name",
          "doc_values" : false
        },
        "full_name" : {
          "type" : "text"
        }
      }
    }
  }
}

7.dynamic
dynamic属性用于检测新发现的字段(即插入记录是存在字段没有被定义的情况),有三个取值:

  • true:新发现的字段添加到映射中。(默认)
  • flase:新检测的字段被忽略。必须显式添加新字段。
  • strict:如果检测到新字段,就会引发异常并拒绝文档
#创建索引
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name",
          "doc_values" : false
        },
        "full_name" : {
          "type" : "text"
        }
      }
    }
  }
}
#添加文档,添加不存在的字段
PUT my_index/_doc/1
{
  "first_name" : "hello",
  "second_name" : "world",
  "age" : 10
}
#返回结果
{
  "error": {
    "root_cause": [
      {
        "type": "strict_dynamic_mapping_exception",
        "reason": "mapping set to strict, dynamic introduction of [age] within [_doc] is not allowed"
      }
    ],
    "type": "strict_dynamic_mapping_exception",
    "reason": "mapping set to strict, dynamic introduction of [age] within [_doc] is not allowed"
  },
  "status": 400
}

8.enabled
ELasticseaech默认会索引所有的字段,enabled设为false的字段,es会跳过字段内容,该字段只能从_source中获取,但是不可搜。而且字段可以是任意类型。

#创建索引
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        "first_name":{
          "type" : "text",
          "copy_to" : "full_name"
        },
        "second_name" : {
          "type" : "text" ,
          "copy_to" : "full_name",
          "doc_values" : false
        },
        "full_name" : {
          "type" : "text"
        },
        "age":{
          "enabled": false
        }
      }
    }
  }
}
#插入数据
PUT my_index/_doc/1
{
  "first_name" : "hello",
  "second_name" : "world",
  "age" : 10
}
#查询
POST my_index/_search
{
    "query": {
        "match": {
            "age": {
                "query": 10
                
            }
        }
    }
}
#返回结果
{
  "took": 3,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 0,
    "max_score": null,
    "hits": []
  }
}

9.format
当type(字段类型)为date时指定日期的保存格式。除了使用系统内置的格式还可以使用自己熟悉的格式,例如:yyyy/mm/dd。(格式将在接下来的章节中详细讲解)
10.ignore_above
ignore_above用于指定字段索引和存储的长度最大值,超过最大值的会被忽略(不能用于type类型为text的字段中)

#添加索引
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "keyword" : {
          "type":"keyword",
          "ignore_above" : 5
        }
        
      }
    }
  }
}
#添加第一条数据(不超过5个字符)
PUT my_index/_doc/1
{
  "keyword" : "hello"
}
#添加第二条数据(超过5个字符)
PUT my_index/_doc/2
{
  "keyword" : "hello world"
}
#查询字段
POST my_index/_search
{
    "query": {
        "match": {
            "keyword": {
                "query": "hello"
                
            }
        }
    }
}
#查询结果,超过5个字符的将被忽略
{
  "took": 4,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 1,
    "max_score": 0.2876821,
    "hits": [
      {
        "_index": "my_index",
        "_type": "_doc",
        "_id": "1",
        "_score": 0.2876821,
        "_source": {
          "keyword": "hello"
        }
      }
    ]
  }
}

mapping中指定了ignore_above字段的最大长度为5,第一个文档的字段长小于等于5,因此索引成功,第二个超过5,因此不索引

11.ignore_malformed
ignore_malformed可以忽略不规则数据。对于账号userid字段,有人可能填写的是 整数类型,也有人填写的是邮件格式。给一个字段索引不合适的数据类型发生异常,导致整个文档索引失败。如果ignore_malformed参数设为true,异常会被忽略,出异常的字段不会被索引,其它字段正常索引。

#第一种情况当ignore_malformed为false时
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "age" : {
          "type":"integer",
          "ignore_malformed" : false
        }
        
      }
    }
  }
}
#插入数据(整型)
PUT my_index/_doc/2
{
  "age" : "10"
}
#返回结果插入成功
{
  "_index": "my_index",
  "_type": "_doc",
  "_id": "2",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 1,
    "failed": 0
  },
  "_seq_no": 0,
  "_primary_term": 1
}
#插入数据(非整形)
PUT my_index/_doc/1
{
  "age" : "hello"
}
#返回结果
{
  "error": {
    "root_cause": [
      {
        "type": "mapper_parsing_exception",
        "reason": "failed to parse [age]"
      }
    ],
    "type": "mapper_parsing_exception",
    "reason": "failed to parse [age]",
    "caused_by": {
      "type": "number_format_exception",
      "reason": "For input string: \"hello\""
    }
  },
  "status": 400
}
#第二种情况,当ignore_malformed为true时
PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "age" : {
          "type":"integer",
          "ignore_malformed" : true
        }
        
      }
    }
  }
}
#插入整形数据和非整形数据
PUT my_index/_doc/1
{
  "age" : "hello"
}
PUT my_index/_doc/2
{
  "age" : "10"
}
#均插入成功
{
  "_index": "my_index",
  "_type": "_doc",
  "_id": "2",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 1,
    "failed": 0
  },
  "_seq_no": 0,
  "_primary_term": 1
}

12.index_options
用于控制倒排索引记录的内容,有如下四个配置选项

PUT my_index
{
  "mappings": {
    "my_type": {
      "properties": {
        "text": {
          "type": "text",
          "index_options": "offsets"
        }
      }
    }
  }
}

13.index
index属性用于指定字段是否索引,不索引也就不可搜索,取值可以为true或者false。

PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "name" : {
          "type":"text",
          "index" : false
        },
        "title" : {
          "type" : "text"
        }
        
      }
    }
  }
}

14.null_value
当字段遇到null时得处理策略,默认为null,即为空,此时es会忽略该值。可以通过设定该值设定字段的默认值。(该属性不能用于type类型为:text的字段下)

PUT my_index
{
  "mappings": {
    
    "_doc": {
      "dynamic":"strict",
      "properties": {
        
        "name" : {
          "type":"text",
          "index" : false
        },
        "title" : {
          "type" : "keyword",
          "null_value" : "null"
        }
        
      }
    }
  }
}

15.fields
fields可以让同一文本有多种不同的索引方式,比如一个String类型的字段,可以使用text类型做全文检索,使用keyword类型做聚合和排序。

fields可以让同一文本有多种不同的索引方式,比如一个String类型的字段,可以使用text类型做全文检索,使用keyword类型做聚合和排序。

fields可以让同一文本有多种不同的索引方式,比如一个String类型的字段,可以使用text类型做全文检索,使用keyword类型做聚合和排序。

PUT my_index
{
  "mappings": {
    "my_type": {
      "properties": {
        "city": {
          "type": "text",
          "fields": {
            "raw": { 
              "type":  "keyword"
            }
          }
        }
      }
    }
  }
}
PUT my_index/my_type/1
{
  "city": "New York"
}

PUT my_index/my_type/2
{
  "city": "York"
}

GET my_index/_search
{
  "query": {
    "match": {
      "city": "york" 
    }
  },
  "sort": {
    "city.raw": "asc" 
  },
  "aggs": {
    "Cities": {
      "terms": {
        "field": "city.raw" 
      }
    }
  }
}

{
  "took": 31,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  },
  "hits": {
    "total": 2,
    "max_score": null,
    "hits": [
      {
        "_index": "my_index",
        "_type": "my_type",
        "_id": "1",
        "_score": null,
        "_source": {
          "city": "New York"
        },
        "sort": [
          "New York"
        ]
      },
      {
        "_index": "my_index",
        "_type": "my_type",
        "_id": "2",
        "_score": null,
        "_source": {
          "city": "York"
        },
        "sort": [
          "York"
        ]
      }
    ]
  },
  "aggregations": {
    "Cities": {
      "doc_count_error_upper_bound": 0,
      "sum_other_doc_count": 0,
      "buckets": [
        {
          "key": "New York",
          "doc_count": 1
        },
        {
          "key": "York",
          "doc_count": 1
        }
      ]
    }
  }
}

相关文章

微信公众号

最新文章

更多