python读取es中的所有数据并计算md5然后进行持久化
#!/usr/bin/pythonimport threading
import json
import time
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import os
import sys
import argparse
host_list = [
{"host":"1.58.55.11","port":9200},
{"host":"1.58.55.12","port":9200},
{"host":"1.58.55.13","port":9200},
]
es = Elasticsearch(host_list)
size = 1000
query = es.search(index='full_sight',scroll='1m',size=size)
results = query['hits']['hits'] # es查询出的结果第一页
total = query['hits']['total'] # es查询出的结果总量
scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果
# 获取总的页数
page = divmod(total,size)
if page[1] == 0:
page = page[0]
else:
page = page[0] + 1
import hashlib
obj = hashlib.md5()
num = 1
# 获取所有的数据,计算每条数据的md5值,然后写到文件中
for i in range(0, page): # scroll参数必须指定否则会报错
query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits']
for m in query_scroll:
temp = {}
s = json.dumps(m)
obj.update(bytes(s,encoding="utf-8"))
v = obj.hexdigest()
k = m["_id"]
temp[k] = v
with open("test.text","a") as f:
f.write(json.dumps(temp))
f.write("\n")
print(k,num,sep="============>")
num += 1
以上是 python读取es中的所有数据并计算md5然后进行持久化 的全部内容, 来源链接: utcz.com/z/388810.html