今天的文章內容是如何搜索parent/child
話不多說直接開始
搜尋parent/child使用的是has_child跟has_parent,這次我用以下資料做為示範
{
"_index" : "school_members", #父親文檔
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"identity" : "student",
"doc_type" : {
"name" : "parent"
}
}
},
{
"_index" : "school_members", #以下都是小孩文檔
"_type" : "_doc",
"_id" : "aYjz1HQB0efl1Kfs9HgX",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090101",
"name" : "王小明",
"age" : 18,
"class" : "資工一1",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
},
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "aojz1HQB0efl1Kfs9HgX",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090102",
"name" : "許小美",
"age" : 20,
"class" : "資工二2",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
},
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "a4jz1HQB0efl1Kfs9HgX",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090103",
"name" : "風間",
"age" : 18,
"class" : "資工一1",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
},
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "bIjz1HQB0efl1Kfs9HgX",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090104",
"name" : "小新",
"age" : 18,
"class" : "資工一1",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
}
下面是parent/child的mappings
parent:
{
"identity": {
"type": "keyword"
},
"doc_type": {
"type": "join",
"relations": {
"parent": "child"
}
}
child:
{
"uid": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"doc_type": {
"type": "join",
"relations": {
"parent": "child"
}
},
"class": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
先來看看API格式
{
"query": {
"has_child": {
"type": "child", #mappings relations的child的值
"query": { #子文檔的搜索條件
"match_all": {}
},
"max_children": 10, #最多匹配子文檔,如果父文檔匹配的子文檔大於這個數字就不會被搜索
"min_children": 2, #最少匹配子文檔
"score_mode": "min" #算分的模式
}
}
}
結果:
{
"took" : 14,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "1",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"identity" : "student",
"doc_type" : {
"name" : "parent"
}
}
}
]
}
}
{
"query": {
"has_parent": {
"parent_type": "parent", #mappings relations的parent的值
"query": { #父文檔的搜索條件
"bool": {
"must": {
"term": {
"identity": "student"
}
}
}
}
}
}
}
結果:
{
"took" : 20,
"timed_out" : false,
"_shards" : {
"total" : 3,
"successful" : 3,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 1.0,
"hits" : [
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "noj81HQB0efl1KfsVHi8",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090101",
"name" : "王小明",
"age" : 18,
"class" : "資工一1",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
},
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "n4j81HQB0efl1KfsVHi8",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090102",
"name" : "許小美",
"age" : 20,
"class" : "資工二2",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
},
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "oIj81HQB0efl1KfsVHi8",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090103",
"name" : "風間",
"age" : 18,
"class" : "資工一1",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
},
{
"_index" : "school_members",
"_type" : "_doc",
"_id" : "oYj81HQB0efl1KfsVHi9",
"_score" : 1.0,
"_routing" : "student",
"_source" : {
"sid" : "s1090104",
"name" : "小新",
"age" : 18,
"class" : "資工一1",
"doc_type" : {
"name" : "child",
"parent" : 1
}
}
}
]
}
}
下面是這次建立index跟data_import的程式碼,其實create index跟data import應該分開寫比較好,但為了方便我就寫在一起了哈哈哈,mappings感覺也可以使用config的方式引用,但就是...方便!
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import json
class ParentChildImport(object):
def __init__(self):
self.es = Elasticsearch(hosts="10.1.1.20", port=9200)
self.index = "school_members"
def create_parent_data(self):
data = {
"identity": "student",
"doc_type": {
"name": "parent"
}
}
self.es.create(index=self.index, body=data, routing="student", id=1)
def create_index(self):
body = dict()
body['settings'] = self.get_setting()
body['mappings'] = self.get_parent_mappings()
self.es.indices.create(index='school_members', body=body)
@staticmethod
def get_setting():
settings = {
"index": {
"number_of_shards": 3,
"number_of_replicas": 1
}
}
return settings
@staticmethod
def load_child_data():
actions = list()
with open('student.csv', 'r') as f:
for data in f.readlines():
sid, name, age, class_ = data.replace('\n', '').split(',')
actions.append({
"_index": "school_members",
"_op_type": "index",
"_routing": "student",
"_source": {
"sid": sid,
"name": name,
"age": int(age),
"class": class_,
"doc_type": {
"name": "child",
"parent": 1
}
}
})
return actions
@staticmethod
def get_child_mappings():
mappings = {
"properties": {
"uid": {
"type": "keyword"
},
"name": {
"type": "keyword"
},
"doc_type": {
"type": "join",
"relations": {
"parent": "child"
}
},
"class": {
"type": "keyword"
},
"age": {
"type": "integer"
}
}
}
return mappings
@staticmethod
def get_parent_mappings():
mappings = {
"properties": {
"identity": {
"type": "keyword"
},
"doc_type": {
"type": "join",
"relations": {
"parent": "child"
}
}
}
}
return mappings
def create_child_data(self):
body = self.load_child_data()
helpers.bulk(self.es, body)
def chang_mappings(self, p_c_type):
mappins = self.get_parent_mappings() if p_c_type == "p" else self.get_child_mappings()
print(mappins)
self.es.indices.put_mapping(index=self.index, body=mappins)
def execute(self):
self.create_index()
self.create_parent_data()
self.chang_mappings("c")
self.create_child_data()
if __name__ == "__main__":
data_import = ParentChildImport()
data_import.execute()
今天的文章就到這裡,明天是Nested的搜索方式