IT鐵人第14天 Elasticsearch 使用python查詢資料 parent/child

第 12 屆 iThome 鐵人賽

DAY 14

Elastic Stack on Cloud

Python&Elasticsearch 入門系列第 14 篇

12th鐵人賽

hank9820

團隊nutc_imac_XXX

2020-09-28 22:09:06

2851 瀏覽

分享至

今天的文章內容是如何搜索parent/child
話不多說直接開始

parent/child

搜尋parent/child使用的是has_child跟has_parent，這次我用以下資料做為示範

{
    "_index" : "school_members",  #父親文檔
    "_type" : "_doc",
    "_id" : "1",
    "_score" : 1.0,
    "_routing" : "student",
    "_source" : {
      "identity" : "student",
      "doc_type" : {
        "name" : "parent"
      }
    }
  },
  {
    "_index" : "school_members",     #以下都是小孩文檔
    "_type" : "_doc",
    "_id" : "aYjz1HQB0efl1Kfs9HgX",
    "_score" : 1.0,
    "_routing" : "student",
    "_source" : {
      "sid" : "s1090101",
      "name" : "王小明",
      "age" : 18,
      "class" : "資工一1",
      "doc_type" : {
        "name" : "child",
        "parent" : 1
      }
    }
  },
  {
    "_index" : "school_members",
    "_type" : "_doc",
    "_id" : "aojz1HQB0efl1Kfs9HgX",
    "_score" : 1.0,
    "_routing" : "student",
    "_source" : {
      "sid" : "s1090102",
      "name" : "許小美",
      "age" : 20,
      "class" : "資工二2",
      "doc_type" : {
        "name" : "child",
        "parent" : 1
      }
    }
  },
  {
    "_index" : "school_members",
    "_type" : "_doc",
    "_id" : "a4jz1HQB0efl1Kfs9HgX",
    "_score" : 1.0,
    "_routing" : "student",
    "_source" : {
      "sid" : "s1090103",
      "name" : "風間",
      "age" : 18,
      "class" : "資工一1",
      "doc_type" : {
        "name" : "child",
        "parent" : 1
      }
    }
  },
  {
    "_index" : "school_members",
    "_type" : "_doc",
    "_id" : "bIjz1HQB0efl1Kfs9HgX",
    "_score" : 1.0,
    "_routing" : "student",
    "_source" : {
      "sid" : "s1090104",
      "name" : "小新",
      "age" : 18,
      "class" : "資工一1",
      "doc_type" : {
        "name" : "child",
        "parent" : 1
      }
    }
  }

下面是parent/child的mappings
parent:

{
    "identity": {
        "type": "keyword"
    },
    "doc_type": { 
        "type": "join",
        "relations": {
            "parent": "child" 
    }
}

child:

{
    "uid": {
        "type": "keyword"
    },
    "name": {
        "type": "keyword"
    },
    "doc_type": { 
        "type": "join",
        "relations": {
            "parent": "child" 
        }
    },
    "class": {
        "type": "keyword"
    },
    "age": {
        "type": "integer"
    }
}

先來看看API格式

has_child

{
  "query": {
    "has_child": {
      "type": "child", #mappings relations的child的值
      "query": {       #子文檔的搜索條件
         "match_all": {}
      },
      "max_children": 10, #最多匹配子文檔，如果父文檔匹配的子文檔大於這個數字就不會被搜索
      "min_children": 2,  #最少匹配子文檔
      "score_mode": "min" #算分的模式
    }
  }
}

結果：

{
  "took" : 14,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 1,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "school_members",
        "_type" : "_doc",
        "_id" : "1",
        "_score" : 1.0,
        "_routing" : "student",
        "_source" : {
          "identity" : "student",
          "doc_type" : {
            "name" : "parent"
          }
        }
      }
    ]
  }
}

has_parent

{
  "query": {
    "has_parent": {
      "parent_type": "parent", #mappings relations的parent的值
      "query": {  #父文檔的搜索條件
         "bool": {
           "must": {
             "term": {
               "identity": "student"
             }
           }
         }
      }
    }
  }
}

結果：

{
  "took" : 20,
  "timed_out" : false,
  "_shards" : {
    "total" : 3,
    "successful" : 3,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 4,
      "relation" : "eq"
    },
    "max_score" : 1.0,
    "hits" : [
      {
        "_index" : "school_members",
        "_type" : "_doc",
        "_id" : "noj81HQB0efl1KfsVHi8",
        "_score" : 1.0,
        "_routing" : "student",
        "_source" : {
          "sid" : "s1090101",
          "name" : "王小明",
          "age" : 18,
          "class" : "資工一1",
          "doc_type" : {
            "name" : "child",
            "parent" : 1
          }
        }
      },
      {
        "_index" : "school_members",
        "_type" : "_doc",
        "_id" : "n4j81HQB0efl1KfsVHi8",
        "_score" : 1.0,
        "_routing" : "student",
        "_source" : {
          "sid" : "s1090102",
          "name" : "許小美",
          "age" : 20,
          "class" : "資工二2",
          "doc_type" : {
            "name" : "child",
            "parent" : 1
          }
        }
      },
      {
        "_index" : "school_members",
        "_type" : "_doc",
        "_id" : "oIj81HQB0efl1KfsVHi8",
        "_score" : 1.0,
        "_routing" : "student",
        "_source" : {
          "sid" : "s1090103",
          "name" : "風間",
          "age" : 18,
          "class" : "資工一1",
          "doc_type" : {
            "name" : "child",
            "parent" : 1
          }
        }
      },
      {
        "_index" : "school_members",
        "_type" : "_doc",
        "_id" : "oYj81HQB0efl1KfsVHi9",
        "_score" : 1.0,
        "_routing" : "student",
        "_source" : {
          "sid" : "s1090104",
          "name" : "小新",
          "age" : 18,
          "class" : "資工一1",
          "doc_type" : {
            "name" : "child",
            "parent" : 1
          }
        }
      }
    ]
  }
}

下面是這次建立index跟data_import的程式碼，其實create index跟data import應該分開寫比較好，但為了方便我就寫在一起了哈哈哈，mappings感覺也可以使用config的方式引用，但就是...方便！

from elasticsearch import Elasticsearch
from elasticsearch import helpers
import json

class ParentChildImport(object):

    def __init__(self):
        self.es = Elasticsearch(hosts="10.1.1.20", port=9200)
        self.index = "school_members"

    def create_parent_data(self):
        data = { 
            "identity": "student",
            "doc_type": {
                "name": "parent"
            }
        }
        self.es.create(index=self.index, body=data, routing="student", id=1)

    def create_index(self):
        body = dict()
        body['settings'] = self.get_setting()
        body['mappings'] = self.get_parent_mappings()
        self.es.indices.create(index='school_members', body=body)

    @staticmethod
    def get_setting():
        settings = {
            "index": {
                "number_of_shards": 3,
                "number_of_replicas": 1
            }
            
        }
        return settings

    @staticmethod
    def load_child_data():
        actions = list()
        with open('student.csv', 'r') as f:
            for data in f.readlines():
                sid, name, age, class_ = data.replace('\n', '').split(',')
                actions.append({
                    "_index": "school_members",
                    "_op_type": "index",
                    "_routing": "student",
                    "_source": {
                        "sid": sid,
                        "name": name,
                        "age": int(age),
                        "class": class_,
                        "doc_type": {
                            "name": "child",
                            "parent": 1
                        }
                    }
                })
        return actions
    
    @staticmethod
    def get_child_mappings():
        mappings = {
            "properties": {
                "uid": {
                    "type": "keyword"
                },
                "name": {
                    "type": "keyword"
                },
                "doc_type": { 
                    "type": "join",
                    "relations": {
                        "parent": "child" 
                    }
                },
                "class": {
                    "type": "keyword"
                },
                "age": {
                    "type": "integer"
                }
            }
        }
        return mappings

    @staticmethod
    def get_parent_mappings():
        mappings = {
            "properties": {
                "identity": {
                    "type": "keyword"
                },
                "doc_type": { 
                    "type": "join",
                    "relations": {
                        "parent": "child" 
                    }
                }
            }
        }
        return mappings
    
    def create_child_data(self):
        body = self.load_child_data()
        helpers.bulk(self.es, body)

    def chang_mappings(self, p_c_type):
        mappins = self.get_parent_mappings() if p_c_type == "p" else self.get_child_mappings()
        print(mappins)
        self.es.indices.put_mapping(index=self.index, body=mappins)

    def execute(self):
        self.create_index()
        self.create_parent_data()
        self.chang_mappings("c")
        self.create_child_data()

if __name__ == "__main__":
    data_import = ParentChildImport()
    data_import.execute()

今天的文章就到這裡，明天是Nested的搜索方式