python中 jsonchema 与 shema 效率比较

python

前面几篇文章总结了python中jsonschema与schema的用法,这里测试一下两者的效率:

上代码:

import time

from jsonschema import validate, draft7_format_checker

from jsonschema.exceptions import SchemaError, ValidationError

from schema import Schema, And, Optional, SchemaError, Regex

def tags_check(tags_list):

if len(tags_list) < 1 or len(tags_list) > 5:

return False

for tag in tags_list:

if len(tag) < 2:

return False

return True

def id_generator(start=1):

while 1:

yield start

start += 1

class DataFactory(object):

def __init__(self):

self.id_g = id_generator()

def create_data(self):

idn = next(self.id_g)

price = 5.5 + idn

json_data = {

"id": idn,

"name": "jarvis手册%d" % idn,

"info": "贾维斯平台使用手册%d" % idn,

"price": price,

"tags": ["jar"],

"date": "2019-5-25",

"others": {

"info1": "1111",

"info2": "2222"

}

}

return json_data

schema1 = {

"$schema": "http://json-schema.org/draft-07/schema#",

"title": "book info",

"description": "some information about book",

"type": "object",

"properties": {

"id": {

"description": "The unique identifier for a book",

"type": "integer",

"minimum": 1

},

"name": {

"description": "book name",

"type": "string",

"minLength": 3,

"maxLength": 30

},

"info": {

"description": "simple information about book",

"type": "string",

"minLength": 10,

"maxLength": 60

},

"price": {

"description": "book price",

"type": "number",

"multipleOf": 0.5,

"minimum": 5.0,

"maximum": 111111.0,

},

"tags": {

"type": "array",

"additonalItems": {

"type": "string",

"miniLength": 2

},

"miniItems": 1,

"maxItems": 5,

},

"date": {

"description": "书籍出版日期",

"type": "string",

"format": "date",

},

"bookcoding": {

"description": "书籍编码",

"type": "string",

"pattern": "^[A-Z]+[a-zA-Z0-9]{12}$"

},

"others": {

"description": "其他信息",

"type": "object",

"properties": {

"info1": {

"type": "string"

},

"info2": {

"type": "string"

}

}

}

},

"required": [

"id", "name", "info", "price", "tags"

]

}

schema2 = {

"id": And(int, lambda x: 1 <= x, error="id必须是整数,大于等于100"),

"name": And(str, lambda s: 3 <= len(s) <= 30, error="name长度3-10"),

"info": And(str, lambda s: 10 <= len(s) <= 60, error="info信息出错"),

"price": And(float, lambda x: (5.0 < x < 111111.0) and (x % 0.5 == 0), error="price必须是大于5.0小于111.0的小数,且能被0.5整除"),

"tags": And(list, tags_check, error="tags出错"),

Optional("date"): And(str, error="日期格式出错"),

Optional("bookcoding"): And(str, Regex("^[A-Z]+[a-zA-Z0-9]{12}$"), error="书籍编码出错"),

Optional("others"): {

"info1": str,

"info2": str

},

}

def time_jsonschema(data):

start_time = time.time()

for json_data in data:

try:

validate(instance=json_data, schema=schema1, format_checker=draft7_format_checker)

except SchemaError as e:

print("验证模式出错:{}\n提示信息:{}".format(" --> ".join([i for i in e.path]), e.message))

except ValidationError as e:

print("出错字段:{}\n提示信息:{}".format(" --> ".join([i for i in e.path]), e.message))

else:

continue

end_time = time.time()

return end_time - start_time

def time_schema(data):

start_time = time.time()

for json_data in data:

try:

Schema(schema2).validate(json_data)

except SchemaError as e:

print(e)

else:

continue

end_time = time.time()

return end_time - start_time

if __name__ == "__main__":

data = DataFactory()

data_list = [data.create_data() for i in range(10000)]

t1 = time_jsonschema(data_list)

t2 = time_schema(data_list)

print("jsonschema:schema = {}:{} = {}:1\n".format(t1, t2, t1/t2))

结果分析:

# 10条数据时:

jsonschema:schema = 0.012000083923339844:0.0019941329956054688 = 5.517694882831181:1

# 100条数据时:

jsonschema:schema = 0.10173273086547852:0.023936033248901367 = 4.180191742616664:1

# 1000条数据时:

jsonschema:schema = 0.9435069561004639:0.2263953685760498 = 4.127518805860752:1

# 10000条数据时:

jsonschema:schema = 9.319035053253174:2.2689626216888428 = 4.1371787451116295:1

数据在10条的时候,多次测验,最终结果不稳定,耗时比在6.0 ,5.5,3.6左右,波动较大。

数据在100条的时候,多次测验,最终结果比较稳定,耗时比在3.85—4.3之间

数据在1000条的时候,多次测验,最终结果的耗时比在4.0—4.2之间

数据在10000条的时候,由于每次测试时间都比较长,故测试数据相对比较少,但耗时比在4.1左右

试验次数不是很多,基于上面代码和测试数据,schema 效率比 jsonschema 大约高出 4倍

以上是 python中 jsonchema 与 shema 效率比较 的全部内容, 来源链接: utcz.com/z/386928.html

回到顶部