use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method getDocumentInfo.
/**
* 得到文档属性的信息
* @param fileName
* 文件名
* @return 多个文件的文档属性集合(一个 file 节点文档的属性为一个 HashMap);
*/
public List<HashMap<String, String>> getDocumentInfo(String fileName) {
ArrayList<HashMap<String, String>> fileList = new ArrayList<HashMap<String, String>>();
VTDNav vn = vnMap.get(fileName);
AutoPilot apFile = new AutoPilot(vn);
try {
apFile.selectXPath("/xliff/file");
String[] fileAttrNames = { DocumentPropertiesKeys.ORIGINAL, DocumentPropertiesKeys.DATA_TYPE, DocumentPropertiesKeys.SOURCE_LANGUAGE, DocumentPropertiesKeys.TARGET_LANGUAGE };
String[] propTypes = new String[] { DocumentPropertiesKeys.PROJECT_REF, DocumentPropertiesKeys.JOB_REF, DocumentPropertiesKeys.JOB_DATE, DocumentPropertiesKeys.JOB_OWNER, DocumentPropertiesKeys.CLIENT };
VTDUtils vu = new VTDUtils(vn);
while (apFile.evalXPath() != -1) {
String value = "";
HashMap<String, String> fileAttrs = new HashMap<String, String>();
for (String attrName : fileAttrNames) {
value = vu.getCurrentElementAttribut(attrName, "");
fileAttrs.put(attrName, value);
}
AutoPilot ap = new AutoPilot(vn);
vn.push();
value = "";
ap.selectXPath("./header/skl");
if (ap.evalXPath() != -1) {
ap.selectXPath("./external-file");
if (ap.evalXPath() != -1) {
value = vu.getCurrentElementAttribut("href", "");
} else {
ap.selectXPath("./internal-file");
if (ap.evalXPath() != -1) {
value = Constant.SKL_INTERNAL_FILE;
}
}
}
// vn.push();
// ap.selectXPath("./header/skl/external-file");
// value = "";
// if (ap.evalXPath() != -1) {
// int attrIdx = vn.getAttrVal("href");
// value = attrIdx != -1 ? vn.toString(attrIdx) : "";
// }
fileAttrs.put(DocumentPropertiesKeys.SKL, value);
vn.pop();
ap.declareXPathNameSpace(hsNSPrefix, hsR7NSUrl);
vn.push();
ap.selectXPath("./header/hs:prop-group[@name='encoding']/hs:prop[@prop-type='encoding']");
value = "";
if (ap.evalXPath() != -1) {
value = vn.toString(vn.getText());
}
fileAttrs.put(DocumentPropertiesKeys.ENCODING, value);
vn.pop();
for (String attrName : propTypes) {
vn.push();
ap.selectXPath("./header/hs:prop-group[@name=\"project\"]/hs:prop[@prop-type=\"" + attrName + "\"]");
value = "";
if (ap.evalXPath() != -1) {
value = vn.toString(vn.getText());
}
if ("".equals(value) && DocumentPropertiesKeys.JOB_DATE.equals(attrName)) {
value = sdf.format(new Date());
}
fileAttrs.put(attrName, value);
vn.pop();
}
fileList.add(fileAttrs);
}
} catch (XPathParseException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (XPathEvalException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (NavException e) {
LOGGER.error("", e);
e.printStackTrace();
}
return fileList;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method getTagLocation.
/**
* <div style='color:red'>此方法有复制到 convert.ui 插件的 ReverseConversionValidateWithLibrary3 类中。故若修改,注意保持同步 --robert 2012-11-29</div>
* 获取每个标记 header 与 tail 在文本中的 index,此方法主要针对文本段分割,分割点在g、mrk标记里面。robert 2012-11-15
* @param vn
*/
private List<Map<String, String>> getTagLocation(VTDNav vn, String srcContent) {
List<Map<String, String>> tagLoctionList = new LinkedList<Map<String, String>>();
vn.push();
AutoPilot ap = new AutoPilot(vn);
String xpath = "./descendant::node()";
try {
VTDUtils vu = new VTDUtils(vn);
ap.selectXPath(xpath);
int lastIdx = 0;
while (ap.evalXPath() != -1) {
Map<String, String> tagLocationMap = new HashMap<String, String>();
String tagName = vn.toString(vn.getCurrentIndex());
if (!("g".equals(tagName) || "mrk".equals(tagName) || "sub".equals(tagName))) {
continue;
}
String tagHeader = vu.getElementHead();
String tagTail = vu.getElementFragment().replace(tagHeader, "").replace(vu.getElementContent(), "");
int headerIdx = srcContent.indexOf(tagHeader, lastIdx);
int tailIdx = headerIdx + tagHeader.length() + vu.getElementContent().length();
lastIdx = headerIdx;
tagLocationMap.put("tagHeader", tagHeader);
tagLocationMap.put("tagTail", tagTail);
tagLocationMap.put("headerIdx", "" + headerIdx);
tagLocationMap.put("tailIdx", "" + tailIdx);
tagLoctionList.add(tagLocationMap);
}
} catch (Exception e) {
LOGGER.error("", e);
e.printStackTrace();
}
vn.pop();
return tagLoctionList;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method getSplitTuData.
/**
* 针对xliff文件分割,获取相应的trans-unit的内容,包括其他结点,例如group。这种处理方式,针对body第一级子节点。robert 2012-05-11
* 备注:先通过file与body第一级子节点定位到起始位置,再通过startRowId进行判断要获取的节点。直到endRowId或file节点结束时为止。
* @param xlfPath
* @param fileIdx
* 要获取节点内容的当前file节点的序列号(从1开始)
* @param startBodyChildIdx
* 要获取节点内容的起始位置(针对body节点的第一级子节点,从1开始)
* @param startRowId
* 起始rowId或者标记(如notFirst,这表示为file节点的第一个位置开始)
* @param endRowId
* 结束位置的rowId
* @param isFirstFileNode
* 是否是新生成的分割文件的第一个file节点。
* @param isLastOfFile
* 是否是被分割xliff文件的最后,因为这时要处理一些在分割点之外的节点。
* @return
*/
public String getSplitTuData(String xlfPath, int fileIdx, int startBodyChildIdx, String startRowId, String endRowId, boolean isFirstFileNode, boolean isLastOfFile) {
StringBuffer dataSB = new StringBuffer();
List<String[]> removeTuList;
boolean isEnd = false;
VTDNav vn = vnMap.get(xlfPath);
Assert.isNotNull(vn, Messages.getString("file.XLFHandler.msg4") + xlfPath);
AutoPilot ap = new AutoPilot(vn);
AutoPilot checkAp = new AutoPilot(vn);
// 从这个tuXpath所定位的tu节点开始,到endRowId所定位的tu节点结束,或到一个file节点的尾端结束
String tuXpath = "/xliff/file[" + fileIdx + "]/body/node()[" + startBodyChildIdx + "]";
try {
ap.selectXPath(tuXpath);
if (ap.evalXPath() != -1) {
VTDUtils vu = new VTDUtils(vn);
// 起始结点的数据也要计算在内,也要判断有tu子节点的情况
String firstNodeFrag = vu.getElementFragment();
// 判断当前节点是否是tu节点,如果是,则判断是否等于startRowId,如果不是,那就进行其子节点,获取相关节点
int index = vn.getCurrentIndex();
// 是否结束,效果与isEnd相似,但是针对情况不同。
boolean isTheLast = false;
if (index != -1) {
if ("trans-unit".equals(vn.toString(index))) {
if (isFirstFileNode) {
if (!RowIdUtil.getRowId(vn, xlfPath).equals(startRowId)) {
firstNodeFrag = "";
}
}
// 针对两个分割点相连的情况,判断起始rowId是否等于终止rowId,如果是,则退出程序
if (endRowId.equals(RowIdUtil.getRowId(vn, xlfPath))) {
isTheLast = true;
}
} else {
removeTuList = new LinkedList<String[]>();
// 开始循环tu节点
vn.push();
checkAp.resetXPath();
checkAp.selectXPath("descendant::trans-unit");
boolean isStart = false;
while (checkAp.evalXPath() != -1) {
// 如果这是分割后的文件的第一个file节点,那么,它的起始rowId才不会为空。
String curRowId = RowIdUtil.getRowId(vn, xlfPath);
if (isFirstFileNode) {
if (!isStart && startRowId.equals(curRowId)) {
isStart = true;
}
if (!isStart) {
removeTuList.add(new String[] { "", vu.getElementFragment() });
}
}
// 在没有开始,或已经结束这个区间之外的所有TU节点都要被删除,注意这个开始节点与结束节点的判断位置,因为结束点也必须包括在内
if (isEnd) {
// 由于文件名变更以后,rowId也会变更,故存入的格式为original与tuID
String original = RowIdUtil.getOriginalByRowId(curRowId);
String tuId = RowIdUtil.getTUIdByRowId(curRowId);
removeTuList.add(new String[] { RowIdUtil.getRowId("{0}", original, tuId), vu.getElementFragment() });
}
if (!isEnd && endRowId.equals(RowIdUtil.getRowId(vn, xlfPath))) {
isEnd = true;
}
}
if (removeTuList.size() >= 1) {
for (String[] tuRowIdAndFrag : removeTuList) {
String tuPlaceHolder = "";
if (!"".equals(tuRowIdAndFrag[0])) {
tuPlaceHolder = "<hs:TuPlaceHolder rowId=\"" + tuRowIdAndFrag[0] + "\" />";
}
firstNodeFrag = firstNodeFrag.replace(tuRowIdAndFrag[1], tuPlaceHolder);
}
}
vn.pop();
}
}
dataSB.append(firstNodeFrag);
if (isTheLast || isEnd) {
return dataSB.toString();
}
// 开始向下循环每一个body的第一级子节点
String followNodeXpath = "following-sibling::node()";
ap.resetXPath();
ap.selectXPath(followNodeXpath);
while (ap.evalXPath() != -1) {
String curNodeFrag = vu.getElementFragment();
index = -1;
index = vn.getCurrentIndex();
if (index != -1) {
String nodeName = vn.toString(vn.getCurrentIndex());
// 如果名称等于trans-unit,那么标志这个节点就是tu节点
if ("trans-unit".equals(nodeName)) {
String rowId = RowIdUtil.getRowId(vn, xlfPath);
if (endRowId.equals(rowId)) {
// 如果这是最后一处分割节点,那么,继续循环,获取最后几个在分割点RowId之外的非TU节点
if (!isLastOfFile) {
dataSB.append(curNodeFrag);
break;
}
}
} else {
// 否则,循环其子节点,查看其中是否存在子节点
isEnd = false;
removeTuList = new LinkedList<String[]>();
vn.push();
checkAp.selectXPath("descendant::trans-unit");
while (checkAp.evalXPath() != -1) {
String curRowId = RowIdUtil.getRowId(vn, xlfPath);
if (isEnd) {
String original = RowIdUtil.getOriginalByRowId(curRowId);
String tuId = RowIdUtil.getTUIdByRowId(curRowId);
removeTuList.add(new String[] { RowIdUtil.getRowId("{0}", original, tuId), vu.getElementFragment() });
} else {
if (endRowId.equals(curRowId)) {
isEnd = true;
}
}
}
checkAp.resetXPath();
vn.pop();
if (isEnd) {
// 如果已经到达结束点,那么删除该删除的tu节点
for (String[] tuRowIdAndFrag : removeTuList) {
String tuPlaceHolder = "<hs:TuPlaceHolder rowId=\"" + tuRowIdAndFrag[0] + "\" />";
curNodeFrag = curNodeFrag.replace(tuRowIdAndFrag[1], tuPlaceHolder);
}
if (!isLastOfFile) {
dataSB.append(curNodeFrag);
break;
}
}
}
}
dataSB.append(curNodeFrag);
}
}
} catch (Exception e) {
LOGGER.error("", e);
e.printStackTrace();
}
return dataSB.toString();
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method validateMultiFileNodes.
/**
* 检验多个 file 节点是否存在 document 属性组,以及该属性组下是否存在 original 属性(用于转换 XLIFF 的源文件为 OpenOffice 和 MSOffice2007 的情况)
* @param fileName
* @return ;
*/
public boolean validateMultiFileNodes(String fileName) {
VTDNav vn = vnMap.get(fileName);
Assert.isNotNull(vn, Messages.getString("file.XLFHandler.msg4") + fileName);
try {
AutoPilot subAp = new AutoPilot(vn);
subAp.declareXPathNameSpace(hsNSPrefix, hsR7NSUrl);
subAp.selectXPath("./header/hs:prop-group[@name=\"document\"]/hs:prop[@prop-type=\"original\"]");
VTDUtils vu = new VTDUtils(vn);
AutoPilot ap = new AutoPilot(vn);
ap.selectXPath("/xliff/file");
while (ap.evalXPath() != -1) {
vn.push();
subAp.resetXPath();
if (subAp.evalXPath() != -1) {
String documentOriginal = vu.getElementContent();
if (documentOriginal == null || documentOriginal.equals("")) {
return false;
}
} else {
return false;
}
vn.pop();
}
return true;
} catch (XPathParseException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (XPathEvalException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (NavException e) {
LOGGER.error("", e);
e.printStackTrace();
}
return false;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method getNodeFrag.
/**
* 获取整个节点,包括其头部,其子节点,其文本
* @param xlfPath
* @param nodeXPath
* 节点的xpath
* @return robert 2011-10-21
*/
public String getNodeFrag(String xlfPath, String nodeXPath) {
VTDNav vn = vnMap.get(xlfPath);
Assert.isNotNull(vn, Messages.getString("file.XLFHandler.msg4") + xlfPath);
String xliffNodeContent = "";
try {
AutoPilot ap = new AutoPilot(vn);
ap.selectXPath(nodeXPath);
VTDUtils vu = new VTDUtils(vn);
if (ap.evalXPath() != -1) {
xliffNodeContent = vu.getElementFragment();
}
} catch (XPathParseException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (NavException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (XPathEvalException e) {
LOGGER.error("", e);
e.printStackTrace();
}
return xliffNodeContent;
}
Aggregations