使用JAXB解组XML而不会转义字符

想象以下情况:我们从某个外部工具收到一个xml文件。最近,在此xml中,节点名称中或它们的richcontent标记中可能有一些转义的字符,如以下示例所示(简化):

<map>

<node TEXT="Project">

<node TEXT="&#xe4;&#xe4;">

<richcontent TYPE="NOTE"><html>

<head>

</head>

<body>

<p>

I am a Note for Node &#228;&#228;!

</p>

</body>

</html>

</richcontent>

</node>

</node>

</map>

在使用JAXB解组文件后,那些逃脱的字符将不被转义。不幸的是,我需要他们保持原样,这意味着逃脱了。有什么方法可以避免在解组时将这些字符转义?

使用JAXB甚至有可能实现这一目标,还是我们甚至必须考虑更改为其他xml阅读器API?

预先谢谢你,艾梅娜

回答:

你只需要更换&#&amp;#,因此呼叫

unmarshaller.unmarshal(new AmpersandingStream(new FileInputStream(...)));

import java.io.IOException;

import java.io.InputStream;

/**

* Replaces numerical entities with their notation as text.

*/

public class AmpersandingStream extends InputStream {

private InputStream in;

private boolean justReadAmpersand;

private String lookAhead = "";

public AmpersandingStream(InputStream in) {

this.in = in;

}

@Override

public int read() throws IOException {

if (!lookAhead.isEmpty()) {

int c = lookAhead.codePointAt(0);

lookAhead = lookAhead.substring(Character.charCount(c));

return c;

}

int c = in.read();

if (c == (int)'#' && justReadAmpersand) {

c = (int)'a';

lookAhead = "mp;#";

}

justReadAmpersand = c == (int)'&';

return c;

}

@Override

public int available() throws IOException {

return in.available();

}

@Override

public void close() throws IOException {

in.close();

}

@Override

public synchronized void mark(int readlimit) {

in.mark(readlimit);

}

@Override

public boolean markSupported() {

return in.markSupported();

}

@Override

public int read(byte[] b) throws IOException {

return in.read(b);

}

@Override

public int read(byte[] b, int off, int len) throws IOException {

return in.read(b, off, len);

}

@Override

public synchronized void reset() throws IOException {

in.reset();

}

@Override

public long skip(long n) throws IOException {

return in.skip(n);

}

}

以上是 使用JAXB解组XML而不会转义字符 的全部内容, 来源链接: utcz.com/qa/423453.html

回到顶部