use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class DocUtils method isTMX.
/**
* 判断是否是正确的 TMX 文件
* @param fileName
* @return ;
* @throws FileNotFoundException
* @throws ParseException
* @throws EntityException
* @throws EOFException
* @throws EncodingException
*/
public static VTDUtils isTMX(String fileName) throws FileNotFoundException, EncodingException, ParseException {
VTDGen vg = new VTDGen();
FileInputStream fis = null;
File f = null;
try {
f = new File(fileName);
fis = new FileInputStream(f);
byte[] b = new byte[(int) f.length()];
int offset = 0;
int numRead = 0;
// I choose this value randomally,
int numOfBytes = 1048576;
// any other (not too big) value also can be here.
if (b.length - offset < numOfBytes) {
numOfBytes = b.length - offset;
}
while (offset < b.length && (numRead = fis.read(b, offset, numOfBytes)) >= 0) {
offset += numRead;
if (b.length - offset < numOfBytes) {
numOfBytes = b.length - offset;
}
}
vg.setDoc(b);
vg.parse(true);
} catch (IOException e) {
LOGGER.error(Messages.getString("document.DocUtils.logger1"), e);
} finally {
if (fis != null) {
try {
fis.close();
} catch (Exception e) {
}
}
}
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
String rootPath = "/tmx";
VTDUtils vu = new VTDUtils();
try {
vu.bind(vn);
ap.selectXPath(rootPath);
if (ap.evalXPath() == -1) {
return null;
}
} catch (NavException e) {
LOGGER.error(Messages.getString("document.DocUtils.logger2"), e);
return null;
} catch (XPathEvalException e) {
LOGGER.error(Messages.getString("document.DocUtils.logger2"), e);
return null;
} catch (XPathParseException e) {
LOGGER.error(Messages.getString("document.DocUtils.logger2"), e);
return null;
} finally {
vg.clear();
}
return vu;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class TmxReader method validateTmxAndParseHeader.
/**
* Validate TMX Format,and pilot to Body XMLElement
* @param vg
* @throws TmxReadException
* ;
*/
private void validateTmxAndParseHeader(VTDGen vg) throws TmxReadException {
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
String rootPath = "/tmx";
vu = new VTDUtils();
try {
vu.bind(vn);
ap.selectXPath(rootPath);
if (ap.evalXPath() == -1) {
throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
}
ap.resetXPath();
ap.selectXPath("/tmx/header");
if (ap.evalXPath() == -1) {
throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
}
int id = vu.getVTDNav().getAttrVal("srclang");
if (id == -1) {
throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
}
header.setSrclang(vu.getVTDNav().toString(id).trim());
if (vu.pilot("/tmx/body") == -1) {
throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
}
// compute total tu number
this.totalTu = vu.getChildElementsCount();
} catch (VTDException e) {
logger.error("", e);
throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError") + e.getMessage());
} finally {
vg.clear();
}
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class Docx2Xliff method idealizeGTag.
/**
* 简化 xliff 文件的标记,主要功能是将一个源文中的 g 标记进行抽取到骨架的操作。针对一个源文中只有一个 g 标记,并且该 g 标记包褒全文本段
* 生成一个 名为 interTag.xml 的文件,存放于骨架文件的第一级子目录,与 word 文件夹同目录
* 其结构大致为<br>
* <docxTags><br>
* <tag tuId="0" >this is a tag</tag><br>
* </docxTags><br>
* <div style="color:red">备注:interTag.xml 介绍: 此文件并非 docx 的内部文件,而是保存转换 docx 文件时的部份 g标记(源文中只有一对 g 标记,并且是它包褒一整个文本段)</div>
*/
private static void idealizeGTag(String xliffPath, String interTagPath) throws Exception {
final String constantGHeader = "<g";
final String constantGEnd = "</g>";
VTDGen vg = new VTDGen();
if (!vg.parseFile(xliffPath, true)) {
throw new Exception();
}
VTDNav vn = vg.getNav();
String xpath = "/xliff/file/body/descendant::trans-unit[source/text()!='' or source/*]";
AutoPilot ap = new AutoPilot(vn);
AutoPilot childAP = new AutoPilot(vn);
VTDUtils vu = new VTDUtils(vn);
XMLModifier xm = new XMLModifier(vn);
ap.selectXPath(xpath);
int index = -1;
String id = null;
StringBuffer tagContentSB = new StringBuffer();
while (ap.evalXPath() != -1) {
id = null;
index = vn.getAttrVal("id");
if (index != -1) {
id = vn.toString(index);
}
if (id == null) {
vn.pop();
continue;
}
vn.push();
childAP.selectXPath("./source");
if (childAP.evalXPath() == -1) {
vn.pop();
continue;
}
String srcText = vu.getElementContent();
childAP.selectXPath("count(./g)");
// 如果 g 标签个数为 1 ,并且包褒整个文本段,那么便可进行清理
if (childAP.evalXPathToNumber() == 1) {
if (srcText.indexOf(constantGHeader) == 0 && srcText.indexOf(constantGEnd) == (srcText.length() - 4)) {
childAP.selectXPath("./g");
if (childAP.evalXPath() != -1) {
String header = vu.getElementHead();
String content = vu.getElementContent();
// 删除 g 标记
xm.remove();
xm.insertAfterElement(content);
// 将删除的 g 标记保存至 interTag.xml 文件中
tagContentSB.append("\t<tag tuId=\"" + id + "\">" + header + "</g>" + "</tag>\n");
}
}
}
vn.pop();
}
xm.output(xliffPath);
if (tagContentSB.length() > 0) {
// 开始创建 interTag.xml 文件
File file = new File(interTagPath);
if (!file.exists()) {
FileOutputStream output;
output = new FileOutputStream(interTagPath);
output.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n".getBytes("UTF-8"));
output.write("<docxTags>\n".getBytes("UTF-8"));
output.write(tagContentSB.toString().getBytes("UTF-8"));
output.write("</docxTags>".getBytes("UTF-8"));
output.close();
}
}
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XliffInputer method loadXliff.
/**
* 解析 hsxliff 文件
* @throws Exception
*/
private void loadXliff() throws Exception {
VTDGen vg = new VTDGen();
if (vg.parseFile(xliffFile, true)) {
vn = vg.getNav();
ap = new AutoPilot(vn);
childAP = new AutoPilot(vn);
ap.declareXPathNameSpace("hs", "http://www.heartsome.net.cn/2008/XLFExtension");
childAP.declareXPathNameSpace("hs", "http://www.heartsome.net.cn/2008/XLFExtension");
vu = new VTDUtils(vn);
} else {
throw new Exception(MessageFormat.format(Messages.getString("docxConvert.msg2"), xliffFile));
}
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method getFullAndPureText.
/**
* 获取trans-unit节点下source或target节点的全文本或纯文本 robert 2011-12-14
* @param xlfPath
* : xliff文件路径
* @param nodeXpath
* : trans-unit子节点source或target的xpath值 如果返回null,则证明这个节点是个空节点,要么没有这个节点,要么这个节点没有值
* @return textMap:两个值,key1 --> fullText:全文本,key2 --> pureText:纯文本。
*/
public Map<String, String> getFullAndPureText(String xlfPath, String nodeXpath) {
Map<String, String> textMap = new HashMap<String, String>();
VTDNav vn = vnMap.get(xlfPath);
vn.push();
AutoPilot ap = new AutoPilot(vn);
Assert.isNotNull(vn, Messages.getString("file.XLFHandler.msg4") + xlfPath);
try {
VTDUtils vUtils = new VTDUtils(vn);
ap.selectXPath(nodeXpath);
if (ap.evalXPath() != -1) {
String content = vUtils.getElementContent();
if (content != null && !"".equals(content)) {
textMap.put("fullText", content);
textMap.put("pureText", getTUPureText(vn));
}
}
} catch (Exception e) {
LOGGER.error("", e);
e.printStackTrace();
}
vn.pop();
return textMap;
}
Aggregations