use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class QAXmlHandler method getNonTransElementsRegex.
/**
* 只获取非译元素的正则表达式
* @param filePath
* @return
*/
public List<String> getNonTransElementsRegex(String filePath) {
List<String> regexList = new ArrayList<String>();
VTDNav vn = vnMap.get(filePath);
AutoPilot ap = new AutoPilot(vn);
validNull(vn, ap, filePath);
try {
VTDUtils vUtils = new VTDUtils(vn);
ap.selectXPath("/nonTrans/element/regular");
while (ap.evalXPath() != -1) {
regexList.add(vUtils.getElementContent());
}
} catch (Exception e) {
e.printStackTrace();
logger.error(Messages.getString("qa.QAXmlHandler.logger19"), e);
}
return regexList;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class QAXmlHandler method getFilteredTUTextForMultiParaConsis.
/**
* 针对合并打开的文本段一致性检查的情况,获取trans-unit节点过滤后的值,过滤条件为不包括上下文匹配,不包括完全匹配,不包括已锁文本,过滤条件在首选项中设置
* @param xlfPath
* @param nodeXpath
* @param filterMap 过滤条件
* @return ;
*/
public Map<String, ParaConsisDataBean> getFilteredTUTextForMultiParaConsis(List<String> rowIdList, Map<String, Boolean> filterMap, boolean checkSameSource, boolean checkSameTarget, boolean srcIgnoreTag, boolean tarIgnoreTag) {
Map<String, ParaConsisDataBean> filteredTuTextMap = new HashMap<String, ParaConsisDataBean>();
try {
//检查项有两个,即相同源文不同译文,相同译文不同源文,如果某项不检查,那么它的忽略标记为false
if (!checkSameSource) {
srcIgnoreTag = false;
}
if (!checkSameTarget) {
tarIgnoreTag = false;
}
for (String rowId : rowIdList) {
String xlfPath = RowIdUtil.getFileNameByRowId(rowId);
VTDNav vn = vnMap.get(xlfPath);
AutoPilot ap = new AutoPilot(vn);
Assert.isNotNull(vn, Messages.getString("qa.QAXmlHandler.msg1") + xlfPath);
AutoPilot childAp = new AutoPilot(vn);
VTDUtils vUtils = new VTDUtils(vn);
ap.selectXPath(RowIdUtil.parseRowIdToXPath(rowId));
if (ap.evalXPath() != -1) {
ParaConsisDataBean dataBean = new ParaConsisDataBean();
vn.push();
//取出源文本的纯文本之前,先查看其内容是否为空,若为空,则返回null,没有source节点,也返回null
childAp.selectXPath("./source");
if (childAp.evalXPath() != -1) {
//因为标准里面只有一个source,因此此处用if不用while
String srcContent = vUtils.getElementContent();
//如果源文本为空或无值,则返回null
if (srcContent == null || "".equals(srcContent)) {
continue;
} else {
//两个检查项中的忽略标记,若有一项为true,那么就必须获取纯文本
if (srcIgnoreTag || tarIgnoreTag) {
dataBean.setSrcContent(srcContent.trim());
dataBean.setSrcPureText(getTUPureText(vn).trim());
} else {
dataBean.setSrcContent(srcContent.trim());
}
}
} else {
continue;
}
childAp.resetXPath();
vn.pop();
//首先过滤,如果有不应包括的文本段,则返回一个空对象
if (!filterTheTU(vn, filterMap)) {
continue;
}
vn.push();
//下面获取目标文本的纯文本,在之前先检查目标文本是否为空或为空值,若是,则返回null,若没有target节点,也返回空
childAp.selectXPath("./target");
if (childAp.evalXPath() != -1) {
//因为标准里面只有一个target,因此此处用if不用while
String tgtContent = vUtils.getElementContent();
//如果源文本为空或无值,则返回空对象
if (tgtContent == null || "".equals(tgtContent)) {
continue;
} else {
//两个检查项中的忽略标记,若有一项为true,那么就必须获取纯文本
if (srcIgnoreTag || tarIgnoreTag) {
dataBean.setTgtContent(tgtContent.trim());
dataBean.setTgtPureText(getTUPureText(vn).trim());
} else {
dataBean.setTgtContent(tgtContent.trim());
}
}
} else {
continue;
}
dataBean.setLineNumber(rowIdList.indexOf(rowId) + 1);
vn.pop();
filteredTuTextMap.put(rowId, dataBean);
}
}
} catch (Exception e) {
e.printStackTrace();
logger.error(Messages.getString("qa.QAXmlHandler.logger13"), e);
}
return filteredTuTextMap;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class QAXmlHandler method getTuProp.
/**
* 得到翻译单元的属性值 ,来自XLFHandler
* @param rowId
* 行的唯一标识
* @param propName
* 属性名
* @return 属性值;
*/
public String getTuProp(String rowId, String propName) {
VTDNav vn = getVTDNavByRowId(rowId);
String tuXPath = RowIdUtil.parseRowIdToXPath(rowId);
try {
VTDUtils vu = new VTDUtils(vn);
return vu.getValue(tuXPath + "/@" + propName);
} catch (NavException e) {
e.printStackTrace();
logger.error(Messages.getString("qa.QAXmlHandler.logger7"), e);
}
return null;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class TSFileHandler method analysisTranslatorProgress.
// 分析翻译进度。直接返回报告内容的 XML 字符串。
protected String analysisTranslatorProgress(VTDNav vn, final String filepath, float eqvFactor) throws NavException, XPathParseException, XPathEvalException {
VTDUtils vu = new VTDUtils(vn);
StringBuilder sb = new StringBuilder();
sb.append("\t<status-infos purpose=\"");
sb.append(Messages.getString("file.TSFileHandler.analysisTranslatorProgress"));
sb.append("\" filepath=\"" + filepath + "\">\n");
int translatedSegs = 0;
int untranslatedSegs = 0;
int translatedWords = 0;
int untranslatedWords = 0;
boolean isTranslated = false;
AutoPilot ap = new AutoPilot(vn);
ap.selectXPath("//trans-unit");
while (ap.evalXPath() != -1) {
String tgtText = vu.getChildPureText("target");
// 目标文本为空或 null 表示未翻译,反之为已翻译
if (tgtText == null || tgtText.trim().equals("")) {
isTranslated = false;
} else {
isTranslated = true;
}
vn.push();
// 开始统计分析。
String srcText = vu.getChildPureText("source");
String lang = vu.getElementAttribute("source", "xml:lang");
int words = wordCount(srcText, lang);
if (isTranslated) {
translatedSegs++;
translatedWords += words;
} else {
untranslatedSegs++;
untranslatedWords += words;
}
vn.pop();
}
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.translatedSegs"));
sb.append("\" statisticunits=\"segment\">");
sb.append(translatedSegs);
sb.append("</status-info>\n");
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.untranslatedSegs"));
sb.append("\" statisticunits=\"segment\">");
sb.append(untranslatedSegs);
sb.append("</status-info>\n");
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.percent1"));
sb.append("\" statisticunits=\"segment\">");
sb.append(translatedSegs * 100 / (translatedSegs + untranslatedSegs));
sb.append("%");
sb.append("</status-info>\n");
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.total1"));
sb.append("\" statisticunits=\"segment\">");
sb.append(translatedSegs + untranslatedSegs);
sb.append("</status-info>\n");
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.translatedWords"));
sb.append("\" statisticunits=\"word\">");
sb.append(translatedWords);
sb.append("</status-info>\n");
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.untranslatedWords"));
sb.append("\" statisticunits=\"word\">");
sb.append(untranslatedWords);
sb.append("</status-info>\n");
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.percent2"));
sb.append("\" statisticunits=\"word\">");
sb.append(translatedWords * 100 / (translatedWords + untranslatedWords));
sb.append("%");
sb.append("</status-info>\n");
sb.append("\t\t<status-info type=\"");
sb.append(Messages.getString("file.TSFileHandler.total2"));
sb.append("\" statisticunits=\"word\">");
sb.append(translatedWords + untranslatedWords);
sb.append("</status-info>\n");
sb.append("\t</status-infos>\n");
return sb.toString();
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class TSFileHandler method openFile.
@Override
public Map<String, Object> openFile(File file, int tuCount) {
long start = System.currentTimeMillis();
// 验证文件是否存在
if (file == null || !file.exists()) {
String errorMsg = Messages.getString("file.TSFileHandler.logger5");
logger.error(errorMsg);
return getErrorResult(errorMsg, null);
}
// 判断是否还有缓存空间。
boolean canCache = tuCount < TU_CACHE_SIZE;
// 当前文件中解析并缓存翻译单元计数器。
int parsedTuCount = 0;
// 当前文件未解析缓存的翻译单元计数器。
int noParseTuCount = 0;
String filename = file.getAbsolutePath();
int fileIndex = 1;
// 解析文件并获取索引
VTDGen vgRead = new VTDGen();
if (vgRead.parseFile(filename, true)) {
VTDNav vnRead = vgRead.getNav();
VTDUtils vu = null;
try {
vu = new VTDUtils(vnRead);
// 创建临时文件
File tmpFile = createTmpFile();
XMLModifier xm = new XMLModifier(vnRead);
FileOutputStream fos = new FileOutputStream(tmpFile);
xm.output(fos);
fos.close();
tmpFileMap.put(filename, tmpFile.getAbsolutePath());
filesChangeStatus.put(filename, false);
} catch (ModifyException e) {
String errorMsg = MessageFormat.format(Messages.getString("file.TSFileHandler.logger6"), filename);
logger.error(errorMsg, e);
return getErrorResult(errorMsg, e);
} catch (TranscodeException e) {
String errorMsg = MessageFormat.format(Messages.getString("file.TSFileHandler.logger7"), filename);
logger.error(errorMsg, e);
return getErrorResult(errorMsg, e);
} catch (IOException e) {
String errorMsg = MessageFormat.format(Messages.getString("file.TSFileHandler.logger8"), filename);
logger.error(errorMsg, e);
return getErrorResult(errorMsg, e);
} catch (NavException e) {
String errorMsg = Messages.getString("file.TSFileHandler.logger9");
logger.error(errorMsg, e);
return getErrorResult(errorMsg, e);
}
// 创建翻译单元集合缓存。
Vector<TransUnitBean> tusCache = new Vector<TransUnitBean>();
// 创建翻译单元索引集合缓存。
Vector<String> tuIndexCache = new Vector<String>();
// 初始化文件节点属性集合。
fileAttrs = new Hashtable<Integer, Hashtable<String, String>>();
AutoPilot apFile = new AutoPilot(vnRead);
String fileNode = "/xliff/file";
try {
apFile.selectXPath(fileNode);
while (apFile.evalXPath() != -1) {
fileAttrs.put(fileIndex, vu.getCurrentElementAttributs());
AutoPilot apTU = new AutoPilot(vnRead);
apTU.selectXPath("body//trans-unit");
vnRead.push();
while (apTU.evalXPath() != -1) {
// 如果缓冲区未满,则解析文件内容并缓存,否则只计数,不解析内容。
if (canCache) {
String tuid = "";
String srcText = "";
String srcContent = "";
String tgtText = "";
String tgtContent = "";
Hashtable<String, String> srcProps = null;
Hashtable<String, String> tgtProps = null;
// 取翻译单元所有属性
String tmpNode = "";
vnRead.push();
Hashtable<String, String> tuProps = vu.getCurrentElementAttributs();
vnRead.pop();
tuid = tuProps.get("id");
// 取翻译单元源节点完整文本,含内部标记。
vnRead.push();
tmpNode = "./source";
srcContent = vu.getElementContent(tmpNode);
// vnRead.pop();
// 取翻译单元源文本。
// vnRead.push();
srcText = vu.getElementPureText();
// 取翻译单元源节点属性。
srcProps = vu.getCurrentElementAttributs();
vnRead.pop();
// 取翻译单元目标节点完整文本,含内部标记。
vnRead.push();
tmpNode = "./target";
tgtContent = vu.getElementContent(tmpNode);
// vnRead.pop();
// 取翻译单元目标文本。
// vnRead.push();
tgtText = vu.getElementPureText();
// 取翻译单元目标节点属性。
tgtProps = vu.getCurrentElementAttributs();
vnRead.pop();
// 获取所有的 alttrans 匹配节点。
vnRead.push();
Vector<AltTransBean> matches = getAltTrans(vu);
vnRead.pop();
// 构建翻译单元对象,存储节点信息
TransUnitBean tub = new TransUnitBean(tuid, srcContent, srcText);
tub.setTuProps(tuProps);
tub.setSrcProps(srcProps);
tub.setTgtContent(tgtContent);
tub.setTgtText(tgtText);
tub.setTgtProps(tgtProps);
tub.setMatches(matches);
vnRead.push();
tub.setNotes(getNotes(vu));
vnRead.pop();
vnRead.push();
tub.setPropgroups(getPrpoGroups(vu));
vnRead.pop();
tusCache.add(tub);
tuIndexCache.add(filename + ";" + fileIndex + ";" + tuid);
// 解析的翻译单元节点计数
parsedTuCount++;
if (tuCount + parsedTuCount == TU_CACHE_SIZE) {
canCache = false;
}
} else {
// 未解析的翻译单元节点计数
noParseTuCount++;
}
}
vnRead.pop();
// 文件节点索引计数
fileIndex++;
}
transunits.put(filename, tusCache);
tuIndexs.put(filename, tuIndexCache);
actualTuCount.put(filename, parsedTuCount + noParseTuCount);
accessHistory.put(filename, "");
} catch (XPathEvalException e) {
String errorMsg = Messages.getString("file.TSFileHandler.logger10");
logger.error(errorMsg, e);
return getErrorResult(errorMsg, e);
} catch (NavException e) {
String errorMsg = Messages.getString("file.TSFileHandler.logger11");
logger.error(errorMsg, e);
return getErrorResult(errorMsg, e);
} catch (XPathParseException e) {
String errorMsg = Messages.getString("file.TSFileHandler.logger12");
logger.error(errorMsg, e);
return getErrorResult(errorMsg, e);
}
} else {
String errorMsg = MessageFormat.format(Messages.getString("file.TSFileHandler.logger13"), filename);
logger.error(errorMsg);
return getErrorResult(errorMsg, null);
}
long end = System.currentTimeMillis();
// 输出结果
long resultMS = end - start;
long resultS = resultMS / 1000;
long resultM = resultMS / (1000 * 60);
System.gc();
logger.info(Messages.getString("file.TSFileHandler.logger14"), new Object[] { resultM, resultS, resultMS });
Map<String, Object> result = getSuccessResult();
result.put("CurCachedTuCount", Integer.valueOf(parsedTuCount));
result.put("TotalCachedTuCount", Integer.valueOf(parsedTuCount + tuCount));
return result;
}
Aggregations