use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XliffInputer method restoreGTag.
private void restoreGTag(String interTagPath) throws Exception {
if (!new File(interTagPath).exists()) {
return;
}
// 先解析 interTag.xml
VTDGen vg = new VTDGen();
if (!vg.parseFile(interTagPath, true)) {
throw new Exception();
}
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
VTDUtils vu = new VTDUtils(vn);
String xpath = "/docxTags/tag";
ap.selectXPath(xpath);
Map<String, String> tagMap = new HashMap<String, String>();
String tuId = null;
int index = -1;
while (ap.evalXPath() != -1) {
tuId = null;
if ((index = vn.getAttrVal("tuId")) != -1) {
tuId = vn.toString(index);
}
if (tuId == null) {
continue;
}
String content = vu.getElementContent().replace("</g>", "");
if (content.indexOf("<g") != 0) {
continue;
}
tagMap.put(tuId, content);
}
// 再将结果传至 xliff 文件
vg = new VTDGen();
if (!vg.parseFile(xliffFile, true)) {
throw new Exception();
}
vn = vg.getNav();
vu.bind(vn);
ap.bind(vn);
XMLModifier xm = new XMLModifier(vn);
for (Entry<String, String> entry : tagMap.entrySet()) {
String thisTuId = entry.getKey();
String tagContent = entry.getValue();
// docx 转换器里面是没有 多个file节点 的情况
// 先处理源文
xpath = "/xliff/file/body//trans-unit[@id='" + thisTuId + "']/source";
ap.selectXPath(xpath);
if (ap.evalXPath() != -1) {
String srcHeader = vu.getElementHead();
String oldContent = vu.getElementContent();
xm.remove();
StringBuffer newFragSB = new StringBuffer();
newFragSB.append(srcHeader);
newFragSB.append(tagContent);
newFragSB.append(oldContent);
newFragSB.append("</g></source>");
xm.insertAfterElement(newFragSB.toString());
}
// 处理译文
xpath = "/xliff/file/body//trans-unit[@id='" + thisTuId + "']/target";
ap.selectXPath(xpath);
if (ap.evalXPath() != -1) {
String srcHeader = vu.getElementHead();
String oldContent = vu.getElementContent();
xm.remove();
StringBuffer newFragSB = new StringBuffer();
newFragSB.append(srcHeader);
newFragSB.append(tagContent);
newFragSB.append(oldContent);
newFragSB.append("</g></target>");
xm.insertAfterElement(newFragSB.toString());
}
}
xm.output(xliffFile);
// 删除 interTag.xml
File interTagFile = new File(interTagPath);
interTagFile.delete();
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class Xliff2Ttx method main.
public static void main(String[] args) {
// String content = "<g id='2'>this is a test</g>asdf as sad ";
// content = content.substring(0, content.lastIndexOf("</g>")) + content.substring(content.lastIndexOf("</g>") + 4);
// System.out.println(content);
// String content = "<g id='2'>this is a test</g>asdf as sad</g> ";
// int index = content.indexOf("</g>");
// content = "<cf" + content.substring(content.indexOf("<g") + 2, content.indexOf(">")) + ">";
// System.out.println(content);
int index = -1;
// String content = "<g id='2' size=\"11\" complexscriptssize=\"11\" complexscriptsbold=\"on\" bold=\"on\" superscript=\"on\"><ph><symbol font="Symbol" character="F0E2"/></ph></g>";
// String content = "<g id='2' size=\"11\" complexscriptssize=\"11\" complexscriptsbold=\"on\" bold=\"on\" superscript=\"on\"><ph><field/></ph></g>";
// String content = "<g id='2' size=\"11\" complexscriptssize=\"11\" complexscriptsbold=\"on\" bold=\"on\" superscript=\"on\"><ph></cf></ph></g><ph><field/></ph>";
String content = "<g id='2' size=\"11\" complexscriptssize=\"11\" complexscriptsbold=\"on\" bold=\"on\" superscript=\"on\"><ph></cf></ph></g>这后面是个cf标记了哦。<ph type='cf'></cf></ph>";
index = content.indexOf("<ph");
while (index != -1) {
String phFrag = content.substring(index, content.indexOf("</ph>", index) + 5);
System.out.println(phFrag);
try {
VTDGen vg = new VTDGen();
vg.setDoc(phFrag.getBytes());
vg.parse(true);
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
VTDUtils vu = new VTDUtils(vn);
ap.selectXPath("/ph");
String replaceText = "";
String phContent = "";
int attrIdx = -1;
if (ap.evalXPath() != -1) {
phContent = vu.getElementContent();
// 有type属性的,一般是cf标记
if ((attrIdx = vn.getAttrVal("type")) != -1) {
if ("cf".equals(vn.toString(attrIdx))) {
String utType = "";
// 这个cf是开始还是结束<cf size="11" complexscriptssize="11">
if (phContent.indexOf("<cf") != -1) {
utType = "start";
} else {
utType = "end";
}
replaceText = "<ut Type=\"" + utType + "\" RightEdge=\"angle\" DisplayText=\"cf\">" + phContent + "</ut>";
}
} else {
//没有type的,是其他标记,如<symbol font="Symbol" character="F0E2"/>
String tagName = "";
int startIdx = -1;
int endIdx = -1;
// 针对起始标记如<symbol font="Symbol" character="F0E2"/>
if ((startIdx = phContent.trim().indexOf("<")) != -1) {
//针对结束标记如</null?>
if ("/".equals(phContent.trim().substring(startIdx + 4, startIdx + 5))) {
tagName = phContent.trim().substring(startIdx + 5, phContent.trim().indexOf(">"));
} else {
if ((endIdx = phContent.trim().indexOf(" ")) != -1) {
tagName = phContent.trim().substring(startIdx + 4, endIdx);
} else {
//针对没有空格的如<ph><field/></ph>
tagName = phContent.trim().substring(startIdx + 4, phContent.trim().indexOf("/"));
}
}
// System.out.println("tagName = '" + tagName + "'");
}
// System.out.println( "tagName = " + tagName);
replaceText = "<ut DisplayText=\"" + tagName + "\">" + phContent + "</ut>";
}
content = content.replace(phFrag, replaceText);
// System.out.println(content);
}
} catch (Exception e) {
e.printStackTrace();
}
index = content.indexOf("<ph", index + 1);
}
// System.out.println(content);
String tagStr = "<g id='1' size='12'/>";
String newTagStr = "<cf" + tagStr.substring(tagStr.indexOf("<g") + 2, tagStr.indexOf("/>")) + ">";
newTagStr = "<ut Type=\"start\" RightEdge=\"angle\" DisplayText=\"cf\">" + TextUtil.cleanSpecialString(newTagStr) + "</ut>";
// System.out.println("newTagStr = " + newTagStr);
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XliffUtil method getLangPairOfFile.
/**
* @param xlfFile
* @param filePath
* @return String[] 第一个元素为源语言,第二个为目标语言
*/
public static String[] getLangPairOfFile(String xlfFile, String filePath) {
VTDUtils vu = getVU(xlfFile);
String xpath = "/xliff/file[@original=\"" + filePath + "\"]";
String[] langPair = new String[2];
try {
langPair[0] = vu.getElementAttribute(xpath, "source-language");
langPair[1] = vu.getElementAttribute(xpath, "target-language");
} catch (XPathParseException e) {
e.printStackTrace();
} catch (XPathEvalException e) {
e.printStackTrace();
} catch (NavException e) {
e.printStackTrace();
}
return langPair;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class XliffUtil method tuCountOfLangPair.
/**
* @param xlfFile
* @param srcLang
* @param tgtLang
* @param includingNull
* @return int 指定语言对的 trans-unit 节点数量,以 file 节点的源、目标语言为准
*/
public static int tuCountOfLangPair(String xlfFile, String srcLang, String tgtLang, boolean includingNull) {
VTDUtils vu = getVU(xlfFile);
List<String> orgFiles = getOriginalFiles(vu);
int tuCount = 0;
for (String orgFile : orgFiles) {
String[] langPair = getLangPairOfFile(xlfFile, orgFile);
if (srcLang.equals(langPair[0]) && tgtLang.equals(langPair[1])) {
int count = tuCountOfFile(xlfFile, orgFile, includingNull);
tuCount += count;
}
}
return tuCount;
}
use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.
the class ReverseConversionValidateWithLibrary3 method reBuildXlf.
/**
* <div style='color:red;'>代替上面的 reBuildXlf 方法</div>
* 将要逆转换的 hsxliff 文件生成临时文件,再将这个临时文件进行处理,比如将分割的文本段拆分开来 robert 2012-11-28
* @param tmpXLFFile
*/
private void reBuildXlf(String xliffPath, File tmpXLFFile) {
//先将所有合并的文本段进行恢复成原来的样子
try {
ResourceUtils.copyFile(new File(xliffPath), tmpXLFFile);
VTDGen vg = new VTDGen();
if (!vg.parseFile(xliffPath, true)) {
LOGGER.error(MessageFormat.format("{0} parse error!", xliffPath));
return;
}
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
AutoPilot childAP = new AutoPilot(vn);
ap.declareXPathNameSpace("hs", "http://www.heartsome.net.cn/2008/XLFExtension");
childAP.declareXPathNameSpace("hs", "http://www.heartsome.net.cn/2008/XLFExtension");
VTDUtils vu = new VTDUtils(vn);
XMLModifier xm = new XMLModifier(vn);
// 先找出所有的分割与合并信息,再依序列号从高到低依次分解,合并信息是<ph id="hs-merge0~1" splitMergeIndex="0"> 这种标记
NavigableMap<Long, SegMergeInfoBean> infoMap = new TreeMap<Long, SegMergeInfoBean>();
ap.selectXPath("/xliff/file/body/descendant::node()" + "[(name()='group' and @ts='hs-split') or (name()='ph' and contains(@id, 'hs-merge'))]");
int idx = -1;
while (ap.evalXPath() != -1) {
String nodeName = vn.toString(vn.getCurrentIndex());
long index = -1;
if ((idx = vn.getAttrVal("splitMergeIndex")) != -1) {
index = Long.parseLong(vn.toString(idx));
}
boolean isMerge = false;
// 如果是 ph 节点,那么这个就是合并信息
if ("ph".equals(nodeName)) {
isMerge = true;
String phFrag = vu.getElementFragment();
String phID = vn.toString(vn.getAttrVal("id"));
String[] tuIds = vn.toString(vn.getAttrVal("id")).replace("hs-merge", "").split("~");
String mergeFirstId = tuIds[0].trim();
String mergeSecondId = tuIds[1].trim();
System.out.println("mergeFirstId = " + mergeFirstId);
System.out.println("mergeSecondId = " + mergeSecondId);
infoMap.put(index, new SegMergeInfoBean(isMerge, phFrag, phID, mergeFirstId, mergeSecondId));
} else {
infoMap.put(index, new SegMergeInfoBean(isMerge));
}
}
for (Entry<Long, SegMergeInfoBean> entry : infoMap.descendingMap().entrySet()) {
Long index = entry.getKey();
SegMergeInfoBean bean = entry.getValue();
if (bean.isMerge()) {
resetMerge(ap, vn, vu, xm, index, bean);
} else {
resetSplit(ap, childAP, vn, vu, xm, index);
}
vn = xm.outputAndReparse();
xm.bind(vn);
ap.bind(vn);
childAP.bind(vn);
vu.bind(vn);
}
xm.output(tmpXLFFile.getAbsolutePath());
} catch (Exception e) {
e.printStackTrace();
}
}
Aggregations