python读取es中的所有数据并计算md5然后进行持久化

python

#!/usr/bin/python

import threading

import json

import time

from elasticsearch import Elasticsearch

from elasticsearch import helpers

import os

import sys

import argparse

host_list = [

{"host":"1.58.55.11","port":9200},

{"host":"1.58.55.12","port":9200},

{"host":"1.58.55.13","port":9200},

]

es = Elasticsearch(host_list)

size = 1000

query = es.search(index='full_sight',scroll='1m',size=size)

results = query['hits']['hits'] # es查询出的结果第一页

total = query['hits']['total'] # es查询出的结果总量

scroll_id = query['_scroll_id'] # 游标用于输出es查询出的所有结果

# 获取总的页数

page = divmod(total,size)

if page[1] == 0:

page = page[0]

else:

page = page[0] + 1

import hashlib

obj = hashlib.md5()

num = 1

# 获取所有的数据,计算每条数据的md5值,然后写到文件中

for i in range(0, page): # scroll参数必须指定否则会报错

query_scroll = es.scroll(scroll_id=scroll_id,scroll='1m',)['hits']['hits']

for m in query_scroll:

temp = {}

s = json.dumps(m)

obj.update(bytes(s,encoding="utf-8"))

v = obj.hexdigest()

k = m["_id"]

temp[k] = v

with open("test.text","a") as f:

f.write(json.dumps(temp))

f.write("\n")

print(k,num,sep="============>")

num += 1

以上是 python读取es中的所有数据并计算md5然后进行持久化 的全部内容, 来源链接: utcz.com/z/388810.html

回到顶部