在Python中合并多个JSON文件的问题

我正在尝试将多个JSON文件组合到Ubuntu平台中。例如,来自两个文件的数据如下:

{

"artist":"Gob",

"timestamp":"2011-08-09 01:59:41.352247",

"similars":[

[

"TRTOVWD128F92F4227",

1

],

[

"TRUXNUD128F92F41D0",

0.97294099999999994

],

[

"TRNNOJO128F42992E9",

0.073926900000000004

],

[

"TRGZHTT128F423B2A4",

0.068387699999999996

],

[

"TRGYKYD128F42625F6",

0.065579700000000005

],

[

"TRGIWHY128F42625F5",

0.064063700000000001

],

[

"TRJCJTX128F930CACE",

0.063140100000000005

],

[

"TRMYNWT128F426254B",

0.0613825

],

[

"TRRQOJI128F428C865",

0.061121599999999998

],

[

"TRBNYHM128F428A569",

0.061121599999999998

],

[

"TRDLOYE128F4241E72",

0.060951900000000003

],

[

"TRNRVEW12903CBA24F",

0.060332700000000003

],

[

"TRKKIPG12903CBA083",

0.060155

],

[

"TRZHTGP128F428A63B",

0.059873599999999999

],

[

"TRKQSGZ128F428A851",

0.059873599999999999

],

[

"TRTOPDF128F42AD88A",

0.059687799999999999

],

[

"TRIWOPM128F4241E53",

0.058958900000000002

],

[

"TRCCJUW128F14652DB",

0.057935

],

[

"TRERDDF128F428ECC4",

0.057566600000000002

],

[

"TROKWNN128F421A3D8",

0.057379800000000002

],

[

"TRWGOOK128F42AE765",

0.057125000000000002

],

[

"TRFMNKP128F428ADC0",

0.056875099999999998

],

[

"TRDMLZT128F42A01A8",

0.055808900000000002

],

[

"TRGCJVM128E0780E48",

0.0547389

],

[

"TRRXGAY128F14652D7",

0.0538065

],

[

"TRIPEHH128F1462DFF",

0.052843000000000001

],

[

"TRDUOIP128F147D5A7",

0.051851500000000002

],

[

"TRZCHHD12903CC80A1",

0.051251699999999997

],

[

"TRFDDQS128F426243F",

0.051018300000000003

],

[

"TRZDKAR128F42591B8",

0.050740899999999999

],

[

"TRDVXUG128F1456CBF",

0.050486299999999998

],

[

"TRULRYN128F145FC1C",

0.050219800000000002

],

[

"TRMOWIA128F425CE0F",

0.049977500000000001

],

[

"TRUVPMZ128F42B6DF3",

0.049762000000000001

],

[

"TRSBDWW128F4262666",

0.049643699999999999

],

[

"TRKPHWQ128F4264F8C",

0.0495173

],

[

"TRBBLXU128F42623A1",

0.049416700000000001

],

[

"TRJKLLM128F1456C57",

0.049001599999999999

],

[

"TRSAAEI128F4216C24",

0.048813500000000003

],

[

"TRFXICT128F4264F8A",

0.048776199999999999

],

[

"TRINVLH12903CBE5A1",

0.048334500000000002

],

[

"TRMUUJR128F4262475",

0.048306500000000002

],

[

"TRTORTD128F1456AFA",

0.0468265

],

[

"TRECUJO12903CA7120",

0.046065599999999998

],

[

"TRXIRBQ128F93431BB",

0.0456938

],

[

"TRFDDVK128F42B6DF0",

0.045623799999999999

],

[

"TRSRGPM128F421A30B",

0.043976800000000003

],

[

"TRVUPPR128F429507D",

0.042872500000000001

],

[

"TRMHCZC128F428A4CD",

0.040675200000000002

],

[

"TRUFDRV128F4262352",

0.040675200000000002

],

[

"TRUZZHT128F93229AF",

0.039422199999999998

],

[

"TRLSIHL128F429AF18",

0.039002099999999998

],

[

"TRGETCK128F1460DB1",

0.038499499999999999

],

[

"TRSXXNU128F428AEF2",

0.038303799999999999

],

[

"TRFZXSY128F9330D9F",

0.037855199999999999

],

[

"TRPHFYF128F92F27FA",

0.037772100000000003

],

[

"TRNRHSL128F9337B55",

0.036998000000000003

],

[

"TRPTGNZ128F421A56B",

0.036713099999999999

],

[

"TRPAASI128F9337B6E",

0.036410499999999998

],

[

"TRGCROO128F93431C4",

0.035754300000000003

],

[

"TRCUHZL128F4235446",

0.034968699999999998

],

[

"TRDPOTJ128F429AF0C",

0.034860500000000003

],

[

"TROZUXM128F42790A2",

0.0346483

],

[

"TRJVLOQ128F9345A82",

0.034547799999999997

],

[

"TRQTFRP128F145FC1E",

0.033934600000000002

],

[

"TRQEWHR128F421A3F5",

0.032314599999999999

],

[

"TRNTPJA128F4265039",

0.030702900000000002

],

[

"TRDGXWY12903CF52BD",

0.030292300000000001

],

[

"TRBLEMZ128F93102D0",

0.029224300000000002

],

[

"TRBUUYO128F421A405",

0.028448500000000002

],

[

"TREVBDI12903CED7E6",

0.0279674

],

[

"TRKREBF128F429B317",

0.0258321

],

[

"TRZBYPR128F4233A8D",

0.025655000000000001

],

[

"TRTAZUQ12903CFEA78",

0.024545399999999998

],

[

"TRAIPRO128F429AE69",

0.024304699999999999

],

[

"TRTTVUZ128F92FADD3",

0.023320899999999999

],

[

"TRUYEJI128F4265041",

0.022173700000000001

],

[

"TRAXVGT128F9344507",

0.0213992

],

[

"TRJJBLH128F4260DA1",

0.0175365

],

[

"TRAMCWR128F4233F7F",

0.0161158

],

[

"TRXBLME128F424330F",

0.015760900000000001

],

[

"TRMUQXM128F4260D99",

0.015696000000000002

],

[

"TRHRZBJ128EF345514",

0.0156951

],

[

"TRJXIBT128F42454DB",

0.014519199999999999

],

[

"TRTHPOY128F9345AA5",

0.0137264

],

[

"TRRFGJU128F933B2E6",

0.0012336199999999999

],

[

"TRMYJUA128F428A590",

0.00123149

],

[

"TRNMVTE128F933B2EC",

0.00122703

],

[

"TRYALZM128F1483C7D",

0.0012245299999999999

],

[

"TRZVEJU128F4234F4E",

0.00121805

],

[

"TRQAZDO128F145639F",

0.0012166600000000001

],

[

"TRJXNJM12903CF57ED",

0.0012155

],

[

"TRVAOGO128F427C9D6",

0.00120951

],

[

"TRZMZDS128F422843B",

0.0012065000000000001

],

[

"TRXIEOF12903CE8212",

0.0012058699999999999

],

[

"TRPVVUG128F42A36AA",

0.0012057599999999999

],

[

"TRXGVXS128F428AA5C",

0.0012019400000000001

],

[

"TRUBOGF128E078A5B9",

0.0012017900000000001

],

[

"TRITZSB128F4277CC2",

0.0012014

],

[

"TRGHPHX128F9343544",

0.0011975600000000001

],

[

"TRUKWPE128F428114F",

0.00119666

],

[

"TROBGRB128F93229AB",

0.0011964199999999999

],

[

"TRGKTMW12903CFAE65",

0.00119637

]

],

"tags":[

[

"punk rock",

"100"

],

[

"punk",

"60"

]

],

"track_id":"TRAAAFD128F92F423A",

"title":"Face the Ashes"

}

{

"artist":"CLP",

"timestamp":"2011-08-02 06:36:59.879759",

"similars":[

],

"tags":[

],

"track_id":"TRAAAVG12903CFA543",

"title":"Insatiable (Instrumental Version)"

}

我写了一个Python脚本来组合它们。我在每条记录之后添加了新行和逗号。

import glob

read_files = glob.glob("*.json")

with open("merged_file.json", "wb") as outfile:

for f in read_files:

with open(f, "rb") as infile:

outfile.write(infile.read())

outfile.write(',\n')

合并文件的输出为:

    {

"artist":"Gob",

"timestamp":"2011-08-09 01:59:41.352247",

"similars":[

[

"TRTOVWD128F92F4227",

1

],

[

"TRUXNUD128F92F41D0",

0.97294099999999994

],

[

"TRNNOJO128F42992E9",

0.073926900000000004

],

[

"TRGZHTT128F423B2A4",

0.068387699999999996

],

[

"TRGYKYD128F42625F6",

0.065579700000000005

],

[

"TRGIWHY128F42625F5",

0.064063700000000001

],

[

"TRJCJTX128F930CACE",

0.063140100000000005

],

[

"TRMYNWT128F426254B",

0.0613825

],

[

"TRRQOJI128F428C865",

0.061121599999999998

],

[

"TRBNYHM128F428A569",

0.061121599999999998

],

[

"TRDLOYE128F4241E72",

0.060951900000000003

],

[

"TRNRVEW12903CBA24F",

0.060332700000000003

],

[

"TRKKIPG12903CBA083",

0.060155

],

[

"TRZHTGP128F428A63B",

0.059873599999999999

],

[

"TRKQSGZ128F428A851",

0.059873599999999999

],

[

"TRTOPDF128F42AD88A",

0.059687799999999999

],

[

"TRIWOPM128F4241E53",

0.058958900000000002

],

[

"TRCCJUW128F14652DB",

0.057935

],

[

"TRERDDF128F428ECC4",

0.057566600000000002

],

[

"TROKWNN128F421A3D8",

0.057379800000000002

],

[

"TRWGOOK128F42AE765",

0.057125000000000002

],

[

"TRFMNKP128F428ADC0",

0.056875099999999998

],

[

"TRDMLZT128F42A01A8",

0.055808900000000002

],

[

"TRGCJVM128E0780E48",

0.0547389

],

[

"TRRXGAY128F14652D7",

0.0538065

],

[

"TRIPEHH128F1462DFF",

0.052843000000000001

],

[

"TRDUOIP128F147D5A7",

0.051851500000000002

],

[

"TRZCHHD12903CC80A1",

0.051251699999999997

],

[

"TRFDDQS128F426243F",

0.051018300000000003

],

[

"TRZDKAR128F42591B8",

0.050740899999999999

],

[

"TRDVXUG128F1456CBF",

0.050486299999999998

],

[

"TRULRYN128F145FC1C",

0.050219800000000002

],

[

"TRMOWIA128F425CE0F",

0.049977500000000001

],

[

"TRUVPMZ128F42B6DF3",

0.049762000000000001

],

[

"TRSBDWW128F4262666",

0.049643699999999999

],

[

"TRKPHWQ128F4264F8C",

0.0495173

],

[

"TRBBLXU128F42623A1",

0.049416700000000001

],

[

"TRJKLLM128F1456C57",

0.049001599999999999

],

[

"TRSAAEI128F4216C24",

0.048813500000000003

],

[

"TRFXICT128F4264F8A",

0.048776199999999999

],

[

"TRINVLH12903CBE5A1",

0.048334500000000002

],

[

"TRMUUJR128F4262475",

0.048306500000000002

],

[

"TRTORTD128F1456AFA",

0.0468265

],

[

"TRECUJO12903CA7120",

0.046065599999999998

],

[

"TRXIRBQ128F93431BB",

0.0456938

],

[

"TRFDDVK128F42B6DF0",

0.045623799999999999

],

[

"TRSRGPM128F421A30B",

0.043976800000000003

],

[

"TRVUPPR128F429507D",

0.042872500000000001

],

[

"TRMHCZC128F428A4CD",

0.040675200000000002

],

[

"TRUFDRV128F4262352",

0.040675200000000002

],

[

"TRUZZHT128F93229AF",

0.039422199999999998

],

[

"TRLSIHL128F429AF18",

0.039002099999999998

],

[

"TRGETCK128F1460DB1",

0.038499499999999999

],

[

"TRSXXNU128F428AEF2",

0.038303799999999999

],

[

"TRFZXSY128F9330D9F",

0.037855199999999999

],

[

"TRPHFYF128F92F27FA",

0.037772100000000003

],

[

"TRNRHSL128F9337B55",

0.036998000000000003

],

[

"TRPTGNZ128F421A56B",

0.036713099999999999

],

[

"TRPAASI128F9337B6E",

0.036410499999999998

],

[

"TRGCROO128F93431C4",

0.035754300000000003

],

[

"TRCUHZL128F4235446",

0.034968699999999998

],

[

"TRDPOTJ128F429AF0C",

0.034860500000000003

],

[

"TROZUXM128F42790A2",

0.0346483

],

[

"TRJVLOQ128F9345A82",

0.034547799999999997

],

[

"TRQTFRP128F145FC1E",

0.033934600000000002

],

[

"TRQEWHR128F421A3F5",

0.032314599999999999

],

[

"TRNTPJA128F4265039",

0.030702900000000002

],

[

"TRDGXWY12903CF52BD",

0.030292300000000001

],

[

"TRBLEMZ128F93102D0",

0.029224300000000002

],

[

"TRBUUYO128F421A405",

0.028448500000000002

],

[

"TREVBDI12903CED7E6",

0.0279674

],

[

"TRKREBF128F429B317",

0.0258321

],

[

"TRZBYPR128F4233A8D",

0.025655000000000001

],

[

"TRTAZUQ12903CFEA78",

0.024545399999999998

],

[

"TRAIPRO128F429AE69",

0.024304699999999999

],

[

"TRTTVUZ128F92FADD3",

0.023320899999999999

],

[

"TRUYEJI128F4265041",

0.022173700000000001

],

[

"TRAXVGT128F9344507",

0.0213992

],

[

"TRJJBLH128F4260DA1",

0.0175365

],

[

"TRAMCWR128F4233F7F",

0.0161158

],

[

"TRXBLME128F424330F",

0.015760900000000001

],

[

"TRMUQXM128F4260D99",

0.015696000000000002

],

[

"TRHRZBJ128EF345514",

0.0156951

],

[

"TRJXIBT128F42454DB",

0.014519199999999999

],

[

"TRTHPOY128F9345AA5",

0.0137264

],

[

"TRRFGJU128F933B2E6",

0.0012336199999999999

],

[

"TRMYJUA128F428A590",

0.00123149

],

[

"TRNMVTE128F933B2EC",

0.00122703

],

[

"TRYALZM128F1483C7D",

0.0012245299999999999

],

[

"TRZVEJU128F4234F4E",

0.00121805

],

[

"TRQAZDO128F145639F",

0.0012166600000000001

],

[

"TRJXNJM12903CF57ED",

0.0012155

],

[

"TRVAOGO128F427C9D6",

0.00120951

],

[

"TRZMZDS128F422843B",

0.0012065000000000001

],

[

"TRXIEOF12903CE8212",

0.0012058699999999999

],

[

"TRPVVUG128F42A36AA",

0.0012057599999999999

],

[

"TRXGVXS128F428AA5C",

0.0012019400000000001

],

[

"TRUBOGF128E078A5B9",

0.0012017900000000001

],

[

"TRITZSB128F4277CC2",

0.0012014

],

[

"TRGHPHX128F9343544",

0.0011975600000000001

],

[

"TRUKWPE128F428114F",

0.00119666

],

[

"TROBGRB128F93229AB",

0.0011964199999999999

],

[

"TRGKTMW12903CFAE65",

0.00119637

]

],

"tags":[

[

"punk rock",

"100"

],

[

"punk",

"60"

]

],

"track_id":"TRAAAFD128F92F423A",

"title":"Face the Ashes"

},

{

"artist":"CLP",

"timestamp":"2011-08-02 06:36:59.879759",

"similars":[

],

"tags":[

],

"track_id":"TRAAAVG12903CFA543",

"title":"Insatiable (Instrumental Version)"

}

当我使用JSON

Lint(http://jsonlint.com/)验证这些记录时,它告诉我该文件已损坏并且不是有效的JSON。即使花了一段时间,我也无法弄清楚合并的出了什么问题。如果有人对此有任何想法,这将是有帮助的。

回答:

You can’t just concatenate two JSON strings to make valid

JSON (or combine them by tacking ',\n' to the end of

each). Instead, you could combine the two (as Python objects) into a Python

list, then use

json.dump to write

it to a file as JSON:

import json

import glob

result = []

for f in glob.glob("*.json"):

with open(f, "rb") as infile:

result.append(json.load(infile))

with open("merged_file.json", "wb") as outfile:

json.dump(result, outfile)


If you wanted to do it without the (unnecesssary) intermediate step of parsing

each JSON file, you could merge them into a list like this:

import glob

read_files = glob.glob("*.json")

with open("merged_file.json", "wb") as outfile:

outfile.write('[{}]'.format(

','.join([open(f, "rb").read() for f in read_files])))

以上是 在Python中合并多个JSON文件的问题 的全部内容, 来源链接: utcz.com/qa/423732.html

回到顶部