use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method getSrcContent.
/**
* 得到源的完整内容
* @param rowId
* 行的唯一标识
* @return 源的完整内容;
*/
public String getSrcContent(String rowId) {
String tuXPath = RowIdUtil.parseRowIdToXPath(rowId);
VTDNav vn = getVTDNavByRowId(rowId);
try {
VTDUtils vu = new VTDUtils(vn);
return vu.getValue(tuXPath + "/source/text()");
} catch (NavException e) {
LOGGER.error("", e);
e.printStackTrace();
}
return null;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method getAutoQAFilteredTUText.
/**
* 针对自动品质检查,根据rowId获取当前tu节点的所有过滤后的数据,过滤条件为不包括上下文匹配,不包括完全匹配,不包括已锁文本,过滤条件在首选项中设置.包括源文本,源纯文本,目标文本,目标纯文本,源语言,目标语言 robert
* 2011-02-14
* @return ;
*/
public QATUDataBean getAutoQAFilteredTUText(String rowId, Map<String, Boolean> filterMap) {
QATUDataBean tuDataBean = new QATUDataBean();
VTDNav vn = getVTDNavByRowId(rowId).duplicateNav();
String tuXpath = RowIdUtil.parseRowIdToXPath(rowId);
Assert.isNotNull(vn, Messages.getString("file.XLFHandler.msg4") + RowIdUtil.getFileNameByRowId(rowId));
try {
AutoPilot ap = new AutoPilot(vn);
AutoPilot childAp = new AutoPilot(vn);
AutoPilot langAp = new AutoPilot(vn);
VTDUtils vUtils = new VTDUtils(vn);
ap.selectXPath(tuXpath);
if (ap.evalXPath() != -1) {
// 首先过滤,如果有不应包括的文本段,则返回一个空对象
if (!filterTheTU(vn, filterMap)) {
tuDataBean.setPassFilter(false);
return tuDataBean;
}
vn.push();
String srcLang = "";
// 取出源文本的纯文本之前,先查看其内容是否为空,若为空,则返回null,没有source节点,也返回null
childAp.selectXPath("./source");
if (childAp.evalXPath() != -1) {
// 因为标准里面只有一个source,因此此处用if不用while
String srcContent = vUtils.getElementContent();
// 如果源文本为空或无值,则返回null
if (srcContent == null || "".equals(srcContent)) {
return null;
} else {
tuDataBean.setSrcPureText(getTUPureText(vn));
tuDataBean.setSrcContent(srcContent);
}
// 获取源语言
int langIdx;
if ((langIdx = vn.getAttrVal("xml:lang")) != -1) {
srcLang = vn.toString(langIdx);
} else {
// 若该节点没有源语言,那么向上去查找file节点的源语言
langAp.selectXPath("ancestor::file");
if (langAp.evalXPath() != -1) {
if ((langIdx = vn.getAttrVal("source-language")) != -1) {
srcLang = vn.toString(langIdx);
}
langAp.resetXPath();
}
}
tuDataBean.setSrcLang(srcLang);
} else {
return null;
}
childAp.resetXPath();
vn.pop();
// 下面获取目标文本的纯文本,在之前先检查目标文本是否为空或为空值,若是,则返回null,若没有target节点,也返回空
childAp.selectXPath("./target");
if (childAp.evalXPath() != -1) {
// 因为标准里面只有一个target,因此此处用if不用while
String tgtContent = vUtils.getElementContent();
// 如果源文本为空或无值,则返回空对象
if (tgtContent == null || "".equals(tgtContent)) {
return tuDataBean;
} else {
tuDataBean.setTgtContent(tgtContent);
tuDataBean.setTgtPureText(getTUPureText(vn));
}
// 获取目标语言
String tgtLang = "";
int langIdx;
if ((langIdx = vn.getAttrVal("xml:lang")) != -1) {
tgtLang = vn.toString(langIdx);
} else {
// 若该节点没有源语言,那么向上去查找file节点的源语言
langAp.selectXPath("ancestor::file");
if (langAp.evalXPath() != -1) {
if ((langIdx = vn.getAttrVal("target-language")) != -1) {
tgtLang = vn.toString(langIdx);
}
}
}
tuDataBean.setTgtLang(tgtLang);
} else {
return tuDataBean;
}
}
} catch (Exception e) {
LOGGER.error("", e);
e.printStackTrace();
}
return tuDataBean;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XLFHandler method mergeSegment.
/**
* 合并两个文本段(注意:rowId1 和 rowId2 有可能不相邻,中间可能会间隔几个源文为空的文本段)
* @param rowId1
* 第一个文本段的 rowId
* @param rowId2
* 第二个文本段的 rowId;
*/
public String mergeSegment(String rowId1, String rowId2) {
// if (rowIndex + 1 >= rowIds.size()) {
// return "";
// }
// String rowId1 = getRowId(rowIndex);
// String rowId2 = getRowId(rowIndex + 1);
String fileName1 = RowIdUtil.getFileNameByRowId(rowId1);
String fileName2 = RowIdUtil.getFileNameByRowId(rowId2);
if (fileName1 == null || fileName2 == null || !fileName1.equals(fileName2)) {
return "";
}
VTDNav vn = vnMap.get(fileName1);
// TransUnitBean tuTop = getTransUnit(rowIndex);
// TransUnitBean tuBottom = getTransUnit(rowIndex + 1);
// String srcContent = tuTop.getSrcContent() + "<ph id=\"hs-merge" +
// tuTop.getId() + "~" + tuBottom.getId()
// + "\"/>" + tuBottom.getSrcContent();
// tuTop.setSrcContent(srcContent);
// String tgtContent = tuTop.getTgtContent() + "<ph id=\"hs-merge" +
// tuTop.getId() + "~" + tuBottom.getId()
// + "\"/>" + tuBottom.getTgtContent();
// tuTop.setTgtContent(tgtContent);
// tuTop.setMatches(null);
//
// String tuXPath1 = RowIdUtil.parseRowIdToXPath(rowId1);
// String tuXPath2 = RowIdUtil.parseRowIdToXPath(rowId2);
// AutoPilot ap = new AutoPilot(vn);
//
// XMLModifier xm = new XMLModifier(vn);
// delete(ap, xm, fileName2, tuXPath2, true);
// update(ap, xm, fileName1, tuXPath2, tuTop.toXMLString(), true);
// ap.selectXPath(tuXPath2);
//
// saveAndReparse(xm, fileName1);
String tuXPath1 = RowIdUtil.parseRowIdToXPath(rowId1);
String tuXPath2 = RowIdUtil.parseRowIdToXPath(rowId2);
String tuid1 = RowIdUtil.getTUIdByRowId(rowId1);
String tuid2 = RowIdUtil.getTUIdByRowId(rowId2);
// 不需加数据库
boolean isNotSendToTM = false;
// 是否是疑问行
boolean isNeeds_review = false;
// 是否添加属性“不需添加到数据库”
boolean isAddNotSendToTm = false;
// 是否添加属性“疑问行”
boolean isAddNeeds_review = false;
StringBuffer nodeSB = new StringBuffer();
AutoPilot ap = new AutoPilot(vn);
try {
VTDUtils vu = new VTDUtils(vn);
String oldElementFragment = "";
ap.selectXPath(tuXPath1);
String xmlSpace = null;
if (ap.evalXPath() != -1) {
xmlSpace = vu.getCurrentElementAttribut("xml:space", "preserve");
oldElementFragment += vu.getElementFragment();
// 获取当前tu节点的属性 hs:send-to-tm="no" hs:needs-review="yes"
isNotSendToTM = "no".equals(vu.getCurrentElementAttribut("hs:send-to-tm", ""));
isNeeds_review = "yes".equals(vu.getCurrentElementAttribut("hs:needs-review", ""));
}
ap.selectXPath(tuXPath2);
if (ap.evalXPath() != -1) {
oldElementFragment += vu.getElementFragment();
if (!isNotSendToTM) {
isAddNotSendToTm = "no".equals(vu.getCurrentElementAttribut("hs:send-to-tm", ""));
}
if (!isNeeds_review) {
isAddNeeds_review = "yes".equals(vu.getCurrentElementAttribut("hs:needs-review", ""));
}
// 开始获取批注
ap.selectXPath("./note");
while (ap.evalXPath() != -1) {
nodeSB.append(vu.getElementFragment());
}
}
XMLModifier xm = new XMLModifier(vn);
String sourceContent2 = "";
String targetContent2 = "";
ap.selectXPath(tuXPath2 + "/source");
String srcLang = null;
if (ap.evalXPath() != -1) {
srcLang = vu.getCurrentElementAttribut("xml:lang", null);
sourceContent2 = vu.getElementContent();
sourceContent2 = sourceContent2 == null ? "" : sourceContent2;
}
ap.selectXPath(tuXPath2 + "/target");
if (ap.evalXPath() != -1) {
targetContent2 = vu.getElementContent();
targetContent2 = targetContent2 == null ? "" : targetContent2;
}
String curTime = "" + System.nanoTime();
ap.selectXPath(tuXPath1 + "/source");
if (ap.evalXPath() != -1) {
String sourceContent1 = vu.getElementContent();
sourceContent1 = sourceContent1 == null ? "" : sourceContent1;
String newValue = sourceContent1 + "<ph id=\"hs-merge" + tuid1 + "~" + tuid2 + "\" splitMergeIndex=\"" + curTime + "\"/>" + sourceContent2;
vu.update(ap, xm, tuXPath1 + "/source/text()", newValue);
}
ap.selectXPath(tuXPath1 + "/target");
if (ap.evalXPath() != -1) {
String targetContent1 = vu.getElementContent();
targetContent1 = targetContent1 == null ? "" : targetContent1;
String newValue = targetContent1 + "<ph id=\"hs-merge" + tuid1 + "~" + tuid2 + "\" splitMergeIndex=\"" + curTime + "\"/>" + targetContent2;
vu.update(ap, xm, tuXPath1 + "/target/text()", newValue);
} else {
String newValue = "<ph id=\"hs-merge" + tuid1 + "~" + tuid2 + "\" splitMergeIndex=\"" + curTime + "\"/>" + targetContent2;
vu.insert(ap, xm, tuXPath1 + "/target/text()", newValue);
}
// 移除alt-trans节点
ap.selectXPath(tuXPath1 + "/alt-trans");
while (ap.evalXPath() != -1) {
xm.remove();
}
ap.selectXPath(tuXPath2);
if (ap.evalXPath() != -1) {
// xm.remove();
// Bug #1054:合并文本段时不应丢失第二个文本段的 trans-unit 节点
String transUnit = "<trans-unit id=\"" + tuid2 + "\" xml:space=\"" + xmlSpace + "\"><target/>" + "<source xml:lang=\"" + srcLang + "\"/></trans-unit>";
vu.update(ap, xm, tuXPath2, transUnit);
}
// 整合并集中的属性以及批注
ap.selectXPath(tuXPath1);
if (ap.evalXPath() != -1) {
// 注意添加属性时必须保持应有的空格
String insertAttri = isAddNotSendToTm ? " hs:send-to-tm=\"no\" " : "";
insertAttri += isAddNeeds_review ? " hs:needs-review=\"yes\" " : "";
if (insertAttri.length() > 0) {
xm.insertAttribute(insertAttri);
}
// 开始添加批注
if (nodeSB.length() > 0) {
xm.insertBeforeTail(nodeSB.toString().getBytes("UTF-8"));
}
vn.push();
// 开始处理合并后的状态,保持草稿状态
ap.selectXPath("./target");
if (ap.evalXPath() != -1) {
int index = vn.getAttrVal("state");
if (index != -1) {
xm.updateToken(index, "new");
} else {
xm.insertAttribute(" state=\"new\" ");
}
}
vn.pop();
// 删除批准状态
int index = vn.getAttrVal("approved");
if (index != -1) {
xm.removeAttribute(index - 1);
}
}
saveAndReparse(xm, fileName1);
// 删除下面的行
rowIds.remove(rowId2);
tuSizeMap.put(fileName1, tuSizeMap.get(fileName1) - 1);
return oldElementFragment;
} catch (XPathParseException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (XPathEvalException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (NavException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (ModifyException e) {
LOGGER.error("", e);
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
LOGGER.error("", e);
e.printStackTrace();
}
return "";
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class PreMachineTranslation method executeTranslation.
/**
* 根据构建参数执行预翻译 ;
* @throws InterruptedException
*/
public List<PreMachineTranslationCounter> executeTranslation(IProgressMonitor monitor) throws InterruptedException {
if (monitor == null) {
monitor = new NullProgressMonitor();
}
simpleMatchers = getMacthers();
if (null == simpleMatchers || simpleMatchers.isEmpty()) {
return this.transCounters;
}
monitor.beginTask("", this.xlfFiles.size());
monitor.setTaskName(Messages.getString("pretranslation.PreTranslation.task1"));
try {
for (String xlfPath : xlfFiles) {
if (monitor != null && monitor.isCanceled()) {
throw new InterruptedException();
}
currentCounter = new PreMachineTranslationCounter(xlfPath);
this.transCounters.add(currentCounter);
VTDNav vn = xlfHandler.getVnMap().get(xlfPath);
VTDUtils vu = new VTDUtils(vn);
AutoPilot ap = new AutoPilot(vu.getVTDNav());
int tuNumber = xlfHandler.getNodeCount(xlfPath, "/xliff/file//descendant::trans-unit[(source/text()!='' or source/*)]");
currentCounter.setTuNumber(tuNumber);
ap.selectXPath("/xliff/file");
String srcLang = "";
String tgtLang = "";
XMLModifier xm = new XMLModifier(vn);
IProgressMonitor monitor2 = new SubProgressMonitor(monitor, 1);
monitor2.beginTask(Messages.getString("pretranslation.PreTranslation.task2"), tuNumber);
while (ap.evalXPath() != -1) {
// 循环 file 节点
String _srcLang = vu.getCurrentElementAttribut("source-language", "");
String _tgtLang = vu.getCurrentElementAttribut("target-language", "");
if (!_srcLang.equals("")) {
srcLang = _srcLang;
}
if (!_tgtLang.equals("")) {
tgtLang = _tgtLang;
}
if (srcLang.equals("") || tgtLang.equals("")) {
continue;
}
keepCurrentMatchs(vu, _srcLang, _tgtLang, xm, monitor2);
}
monitor2.done();
FileOutputStream fos = new FileOutputStream(xlfPath);
BufferedOutputStream bos = new BufferedOutputStream(fos);
// 写入文件
xm.output(bos);
bos.close();
fos.close();
}
} catch (XPathParseException e) {
logger.error("", e);
e.printStackTrace();
} catch (NavException e) {
logger.error("", e);
e.printStackTrace();
} catch (ModifyException e) {
logger.error("", e);
e.printStackTrace();
} catch (XPathEvalException e) {
logger.error("", e);
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
logger.error("", e);
e.printStackTrace();
} catch (FileNotFoundException e) {
logger.error("", e);
e.printStackTrace();
} catch (TranscodeException e) {
logger.error("", e);
e.printStackTrace();
} catch (IOException e) {
logger.error("", e);
e.printStackTrace();
}
monitor.done();
return this.transCounters;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class VTDUtilsTest method setUp.
@BeforeClass
public static void setUp() throws Exception {
VTDGen vg = new VTDGen();
if (vg.parseFile(testFile, true)) {
vn = vg.getNav();
vu = new VTDUtils(vn);
}
}
Aggregations