为啥我的python code只能解析json的前半截?

我的 json 格式如下:

[

{

"status": "changed",

"dataset": {

"id": "5a4b463c855d783af4f5f695",

"name": "AE_E",

"label": "1- ADVERSE EVENTS - Not Analyzed"

},

"details": {

"variables": [

{

"variable": {

"id": "5a4b4647855d783b494f9d3f",

"name": "CPEVENT",

"label": "CPEVENT"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "unary",

"old_value": "factor"

}

},

"message": "Variable with different R Type"

},

{

"variable": {

"id": "5a4b4647855d783b494f9d25",

"name": "CPEVENT2",

"label": "CPEVENT2"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "unary",

"old_value": "binary"

}

},

"message": "Variable with different R Type"

},

{

"variable": {

"id": "5a4b4647855d783b494f9d26",

"name": "CP_UNSCHEDULED",

"label": "CP_UNSCHEDULED"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "undefined",

"old_value": "unary"

}

},

"message": "Variable with different R Type"

},

{

"variable": {

"id": "5a4b4647855d783b494f9d02",

"name": "VISIT_NUMBER",

"label": "VISIT_NUMBER"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "unary",

"old_value": "integer"

}

},

"message": "Variable with different R Type"

},

{

"variable": {

"id": "5a4b4647855d783b494f9ccf",

"name": "VISIT_NUMBER2",

"label": "VISIT_NUMBER2"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "unary",

"old_value": "binary"

}

},

"message": "Variable with different R Type"

}

],

"many_visits": null

}

},

{

"status": "changed",

"dataset": {

"id": "5a4b465b855d783af4f5f737",

"name": "AE_EQG2",

"label": "2 - ADVERSE EVENTS- Not Analyzed"

},

"details": {

"variables": [

{

"variable": {

"id": "5a4b4666855d783b4b5175ce",

"name": "ADVE_MEDDRA_SOC",

"label": "SYSTEM ORGAN CLASS"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "character",

"old_value": "factor"

}

},

"message": "Variable with different R Type"

}

],

"many_visits": null

}

},

{

"status": "changed",

"dataset": {

"id": "5a4b467a855d783af4f5f7d7",

"name": "AE_M",

"label": "3- ADVERSE EVENTS MEDICATION ERROR - Not Analyzed"

},

"details": {

"variables": [

{

"variable": {

"id": "5a4b4682855d783b494f9dad",

"name": "ADVE_MEDDRA_PT",

"label": "PREFERRED TERM -PT-"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "character",

"old_value": "factor"

}

},

"message": "Variable with different R Type"

},

{

"variable": {

"id": "5a4b4682855d783b494f9d90",

"name": "ADVE_MEDDRA_PT_CODE",

"label": "PREFERRED TERM -PT- CODE"

},

"status": "changed",

"details": {

"r_type": {

"new_value": "character",

"old_value": "factor"

}

},

"message": "Variable with different R Type"

}

],

"many_visits": null

}

},

{

"status": "unchanged",

"dataset": {

"id": "5a4b468c855d783af4f5f839",

"name": "AGG_AE_E",

"label": "1.1 - ADVERSE EVENTS- Aggregated by patient"

},

"details": null

},

{

"status": "unchanged",

"dataset": {

"id": "5a4b469a855d783af4f5f8db",

"name": "AGG_AE_M",

"label": "3.2- ADVERSE EVENTS MEDICATION ERROR- Aggregated by patient"

},

"details": null

}]

我的code如下:

import collections

import pandas as pd

import json

def flatten(d, parent_key='', sep='_'):

items = []

for k, v in d.items():

new_key = parent_key + sep + k if parent_key else k

is_lst = True if isinstance(v, list) else False

if isinstance(v, collections.MutableMapping) or is_lst:

if is_lst:

items.extend(flatten(v[0], new_key, sep=sep).items())

else:

items.extend(flatten(v, new_key, sep=sep).items())

else:

items.append((new_key, v))

return dict(items)

with open("reuse.txt") as f:

dic = json.load(f)

df_ = [flatten(i) for i in dic]

df =pd.DataFrame(df_)

df.to_csv('out.csv', index=False)

感谢Self帮我修改了之前的一段code,
可是目前的code只能解析一半的数据,另外一半数据被截断了。比如AE_E这个数据集,应该有很多变量的信息被放到csv里面,但是现在只有第一个变量的信息被解析出来了。

我希望得到的结果是这样:

图片描述

目前能得到的结果是这样的:
图片描述

奇怪的结果:
图片描述

AE_E只有第一个变量CPEVENT的结果被解析出来了,其他CPEVENT2等变量的数据都丢失了。

回答:

注意:python3以后才支持yield from语法

import collections

def flatten(d, prefix="", sep="_"):

def _take_prefix(k, v, p):

if p:

yield from flatten(v, "{}{}{}".format(p, sep, k))

else:

yield from flatten(v, str(k))

if isinstance(d, dict):

for k, v in d.items():

if isinstance(v, str) or not isinstance(v, collections.Iterable):

if prefix:

yield "{}{}{}".format(prefix, sep, k), v

else:

yield k, v

elif isinstance(v, dict):

yield from _take_prefix(k, v, prefix)

elif isinstance(v, list):

for i in v:

yield from _take_prefix(k, i, prefix)

else:

pass

else:

pass

dic = {your dataset}

for key, value in flatten(dic):

print("{}: {}".format(key, value))

结果如下,应该能拍平了

status: changed

dataset_id: 5a4b463c855d783af4f5f695

dataset_name: AE_E

dataset_label: 1- ADVERSE EVENTS - Not Analyzed

details_variables_variable_id: 5a4b4647855d783b494f9d3f

details_variables_variable_name: CPEVENT

details_variables_variable_label: CPEVENT

details_variables_status: changed

details_variables_details_r_type_new_value: unary

details_variables_details_r_type_old_value: factor

details_variables_message: Variable with different R Type

details_variables_variable_id: 5a4b4647855d783b494f9d25

details_variables_variable_name: CPEVENT2

details_variables_variable_label: CPEVENT2

details_variables_status: changed

details_variables_details_r_type_new_value: unary

details_variables_details_r_type_old_value: binary

details_variables_message: Variable with different R Type

details_variables_variable_id: 5a4b4647855d783b494f9d26

details_variables_variable_name: CP_UNSCHEDULED

details_variables_variable_label: CP_UNSCHEDULED

details_variables_status: changed

details_variables_details_r_type_new_value: undefined

details_variables_details_r_type_old_value: unary

details_variables_message: Variable with different R Type

details_variables_variable_id: 5a4b4647855d783b494f9d02

details_variables_variable_name: VISIT_NUMBER

details_variables_variable_label: VISIT_NUMBER

details_variables_status: changed

details_variables_details_r_type_new_value: unary

details_variables_details_r_type_old_value: integer

details_variables_message: Variable with different R Type

details_variables_variable_id: 5a4b4647855d783b494f9ccf

details_variables_variable_name: VISIT_NUMBER2

details_variables_variable_label: VISIT_NUMBER2

details_variables_status: changed

details_variables_details_r_type_new_value: unary

details_variables_details_r_type_old_value: binary

details_variables_message: Variable with different R Type

details_many_visits: None

针对你修改后的问题, 再加个函数就搞定:

# 这个fuck_all函数比较特例, 完全是针对你要区分的dataset下面的N个变量信息这种需求

def fuck_all(dic, prefix="details_variables"):

lst = list(flatten(dic)) # flatten函数则比较通用,任何嵌套数据集都可以用它拍平

lines = []

top = {k: v for k, v in lst if not k.startswith(prefix)}

index = 0

for key, value in lst:

if not key.startswith(prefix):

continue

else:

if not lines:

lines.append(top.copy())

if key in lines[index].keys():

index += 1

lines.append(top.copy())

lines[index][key] = value

return lines

d = {your dataset}

for i in fuck_all(d):

print(i)

结果长这样,应该是能满足你需求了

{'status': 'changed', 'dataset_id': '5a4b463c855d783af4f5f695', 'dataset_name': 'AE_E', 'dataset_label': '1- ADVERSE EVENTS - Not Analyzed', 'details_many_visits': None, 'details_variables_variable_id': '5a4b4647855d783b494f9d3f', 'details_variables_variable_name': 'CPEVENT', 'details_variables_variable_label': 'CPEVENT', 'details_variables_status': 'changed', 'details_variables_details_r_type_new_value': 'unary', 'details_variables_details_r_type_old_value': 'factor', 'details_variables_message': 'Variable with different R Type'}

{'status': 'changed', 'dataset_id': '5a4b463c855d783af4f5f695', 'dataset_name': 'AE_E', 'dataset_label': '1- ADVERSE EVENTS - Not Analyzed', 'details_many_visits': None, 'details_variables_variable_id': '5a4b4647855d783b494f9d25', 'details_variables_variable_name': 'CPEVENT2', 'details_variables_variable_label': 'CPEVENT2', 'details_variables_status': 'changed', 'details_variables_details_r_type_new_value': 'unary', 'details_variables_details_r_type_old_value': 'binary', 'details_variables_message': 'Variable with different R Type'}

{'status': 'changed', 'dataset_id': '5a4b463c855d783af4f5f695', 'dataset_name': 'AE_E', 'dataset_label': '1- ADVERSE EVENTS - Not Analyzed', 'details_many_visits': None, 'details_variables_variable_id': '5a4b4647855d783b494f9d26', 'details_variables_variable_name': 'CP_UNSCHEDULED', 'details_variables_variable_label': 'CP_UNSCHEDULED', 'details_variables_status': 'changed', 'details_variables_details_r_type_new_value': 'undefined', 'details_variables_details_r_type_old_value': 'unary', 'details_variables_message': 'Variable with different R Type'}

{'status': 'changed', 'dataset_id': '5a4b463c855d783af4f5f695', 'dataset_name': 'AE_E', 'dataset_label': '1- ADVERSE EVENTS - Not Analyzed', 'details_many_visits': None, 'details_variables_variable_id': '5a4b4647855d783b494f9d02', 'details_variables_variable_name': 'VISIT_NUMBER', 'details_variables_variable_label': 'VISIT_NUMBER', 'details_variables_status': 'changed', 'details_variables_details_r_type_new_value': 'unary', 'details_variables_details_r_type_old_value': 'integer', 'details_variables_message': 'Variable with different R Type'}

{'status': 'changed', 'dataset_id': '5a4b463c855d783af4f5f695', 'dataset_name': 'AE_E', 'dataset_label': '1- ADVERSE EVENTS - Not Analyzed', 'details_many_visits': None, 'details_variables_variable_id': '5a4b4647855d783b494f9ccf', 'details_variables_variable_name': 'VISIT_NUMBER2', 'details_variables_variable_label': 'VISIT_NUMBER2', 'details_variables_status': 'changed', 'details_variables_details_r_type_new_value': 'unary', 'details_variables_details_r_type_old_value': 'binary', 'details_variables_message': 'Variable with different R Type'}

送佛送到西好了

from functools import reduce

import json

import pandas as pd

with open("your dataset file", "r") as fh:

dic = json.load(fh)

df = pd.DataFrame(reduce(lambda x, y: x + y, (fuck_all(i) for i in dic)))

df.to_csv("out.csv", index=False)

成品

clipboard.png

回答:

你可以先测试一下,你的 details 里面的信息是否是 MutableMapping 对象实例。

你的if isinstance(v, collections.MutableMapping)没有执行。

回答:

试试看是否符合你的需求:

def flatten(d, parent_key='', sep='_'):

items = []

for k, v in d.items():

new_key = parent_key + sep + k if parent_key else k

is_lst = True if isinstance(v, list) else False

if isinstance(v, collections.MutableMapping) or is_lst:

if is_lst:

items.extend(flatten(v[0], new_key, sep=sep).items())

else:

items.extend(flatten(v, new_key, sep=sep).items())

else:

items.append((new_key, v))

return dict(items)

以上是 为啥我的python code只能解析json的前半截? 的全部内容, 来源链接: utcz.com/a/162489.html

回到顶部