use of net.heartsome.cat.converter.word2007.common.SectionSegBean in project translationstudio8 by heartsome.
the class PartOperate method getLinkText.
// <w:hyperlink r:id="rId5" w:history="1">
// <w:r w:rsidRPr="007267A9">
// <w:rPr>
// <w:rStyle w:val="a5" />
// <w:noProof />
// </w:rPr>
// <w:t>www.baidu.</w:t>
// </w:r>
// <w:r w:rsidRPr="0077163A">
// <w:rPr>
// <w:rStyle w:val="a5" />
// <w:noProof />
// <w:color w:val="FF0000" />
// </w:rPr>
// <w:t>com</w:t>
// </w:r>
// </w:hyperlink>
/**
* 获取链接的特殊文本<br/>
* 备注:上面是一个关于 w:hyperlink 的例子,这时,一个 w:hyperlink 就当作一个 g 标记进行处理。那么,它的特列文本 就是其应提取的所有内容,不单指 "www.baidu.com",而是 3 个 w:r 节点。<br/>
* 若当前节点中只有一个 w:r 节点,那么不予考虑。返回 null, 若有多个 w:r 节点,那将它们拼接成 sub 与 ph 标记组合的文本,当作 特殊文本进行返回。
* @return
*/
private String getLinkText() throws Exception {
vn.push();
String curLinkText = null;
int rNodeCount = getNodeCount("./w:r");
if (rNodeCount == 1) {
return curLinkText;
}
List<SectionSegBean> segList = new LinkedList<SectionSegBean>();
extendAP.selectXPath("./node()");
while (extendAP.evalXPath() != -1) {
String rStyle = getRStyle();
String text = "";
String extendNodesStr = getExtendNodes();
int tNodeCount = getNodeCount("./w:t");
// 有 t 节点的,就一定有数据
if (tNodeCount == 1) {
text = getText();
// extendNodesAttr 这个属性是自定义的,专门针对于除样式与文本之外的其他节点,如 <w:tab/>
String extendNodesAttr = ("".equals(extendNodesStr) || extendNodesStr == null) ? "" : " extendNodes='" + xlfOutput.cleanString(extendNodesStr) + "'";
// rprAttr 这个是保存 rpr 属性的。rpr属性是用于保存 w:r 样式。
String rprAttr = ("".equals(rStyle) || rStyle == null) ? "" : " rPr='" + xlfOutput.cleanString(rStyle) + "'";
segList.add(new SectionSegBean(null, text, rprAttr, extendNodesAttr, null));
} else {
String phTagStr = vu.getElementFragment();
segList.add(new SectionSegBean(null, null, null, null, xlfOutput.cleanString(phTagStr)));
}
}
vn.pop();
StringBuffer specialTextSB = new StringBuffer();
SectionSegBean bean;
for (int i = 0; i < segList.size(); i++) {
bean = segList.get(i);
String style = bean.getStyle() == null ? "" : bean.getStyle();
String extendNodes = bean.getExtendNodesStr() == null ? "" : bean.getExtendNodesStr();
if (("".equals(style)) && ("".equals(extendNodes)) && bean.getPhTagStr() == null) {
specialTextSB.append(bean.getText());
} else if (bean.getPhTagStr() == null) {
specialTextSB.append("<sub id='" + xlfOutput.useTagId() + "'" + style + extendNodes + ">");
specialTextSB.append(bean.getText());
// 判断下一个是否样式与扩展节点的内容都相同,若相同,就组装成一个g标记
while (i + 1 < segList.size()) {
bean = segList.get(i + 1);
String curStyle = bean.getStyle() == null ? "" : bean.getStyle();
String curExtendNodes = bean.getExtendNodesStr() == null ? "" : bean.getExtendNodesStr();
// 当两个的 ctype 都为空时,才能进行拼接,因为 ctype 多半为 <w:hyperlink .... >
if (curStyle.equals(style) && curExtendNodes.equals(extendNodes)) {
specialTextSB.append(bean.getText());
i++;
} else {
break;
}
}
specialTextSB.append("</sub>");
} else {
String phTagStr = bean.getPhTagStr();
if (!"".equals(phTagStr.trim())) {
specialTextSB.append("<ph id='" + xlfOutput.useTagId() + "'>");
specialTextSB.append(phTagStr);
while (i + 1 < segList.size()) {
bean = segList.get(i + 1);
if (bean.getPhTagStr() != null) {
specialTextSB.append(bean.getPhTagStr());
i++;
} else {
break;
}
}
specialTextSB.append("</ph>");
}
}
}
curLinkText = specialTextSB.toString();
if (curLinkText.length() == 0) {
curLinkText = null;
}
return curLinkText;
}
use of net.heartsome.cat.converter.word2007.common.SectionSegBean in project translationstudio8 by heartsome.
the class XliffInputer method anaysisTgtOrSrcNode.
/**
* 分析 source 或 target 节点,获取其内容
* @throws Exception
*/
private void anaysisTgtOrSrcNode(StringBuffer textSB, VTDNav vn, boolean isText) throws Exception {
vn.push();
AutoPilot otherAP = new AutoPilot(vn);
String childXpath = "./text()|node()";
otherAP.selectXPath(childXpath);
int tokenId = -1;
int index = -1;
Map<Integer, SectionSegBean> targetMap = new TreeMap<Integer, SectionSegBean>();
while (otherAP.evalXPath() != -1) {
index = vn.getCurrentIndex();
tokenId = vn.getTokenType(index);
if (tokenId == 0) {
//节点子节点
ananysisTag(vn, targetMap);
} else if (tokenId == 5) {
// 文本子节点
// if ("+1 845-536-1416".equals(vn.toString(index))) {
// System.out.println("问题开始了。。。。");
// }
targetMap.put(index, new SectionSegBean(null, vn.toRawString(index), null, null, null));
}
}
vn.pop();
SectionSegBean bean;
for (Entry<Integer, SectionSegBean> entry : targetMap.entrySet()) {
bean = entry.getValue();
if (isText) {
if (bean.getText() != null) {
textSB.append(bean.getText());
}
} else {
// 这个要组装 w:r 等节点
String ctype = bean.getCtype() == null ? "" : bean.getCtype();
String style = bean.getStyle() == null ? "" : bean.getStyle();
String extendNodes = bean.getExtendNodesStr() == null ? "" : bean.getExtendNodesStr();
if (bean.getPhTagStr() != null) {
textSB.append(bean.getPhTagStr());
} else {
if ("".equals(ctype)) {
textSB.append("<w:r>" + style + extendNodes);
textSB.append("<w:t xml:space=\"preserve\">" + bean.getText() + "</w:t></w:r>");
} else {
// <w:hyperlink r:id="rId8" w:history="1">
int endIdx = ctype.indexOf(" ") == -1 ? ctype.indexOf(">") : ctype.indexOf(" ");
String nodeName = ctype.substring(ctype.indexOf("<") + 1, endIdx);
textSB.append(ctype);
textSB.append("<w:r>" + style + extendNodes);
textSB.append("<w:t xml:space=\"preserve\">" + bean.getText() + "</w:t></w:r>");
textSB.append("</" + nodeName + ">");
}
}
}
}
}
use of net.heartsome.cat.converter.word2007.common.SectionSegBean in project translationstudio8 by heartsome.
the class PartOperate method analysisNodeP.
/**
* 分析每个 w:p 节点,将要翻译的东西提取出来,用占位符替代。
* or name()='w:fldSimple'
* @throws Exception
*/
protected void analysisNodeP() throws Exception {
// 如果这个节点里面还有 p 节点,那么不进行处理
int textCount = getTextCount();
int index = -1;
Map<Integer, SectionSegBean> sectionSegMap = new TreeMap<Integer, SectionSegBean>();
// 占位符
StringBuffer placeHolderSB = new StringBuffer();
// 开始处理每个节点的文本
if (textCount == 1) {
// 如果一个节点里面只有一个 w:r//w:t ,那么直接获取出内容,如果这个节点里面还有 p 节点,那么不进行处理
childAP.selectXPath("./node()[(name()='w:r' or name()='w:hyperlink') and not(@w:anchor) and not(descendant::node()[name()='w:p'])]/descendant::w:t/text()");
vn.push();
if (childAP.evalXPath() != -1) {
index = vn.getCurrentIndex();
String segment = vn.toRawString(index);
if ("".equals(deleteBlank(segment))) {
vn.pop();
return;
}
String[] segs = segmenter.segment(segment);
for (String seg : segs) {
// 生成 trans-unit 节点
placeHolderSB.append(xlfOutput.addTransUnit(seg));
}
xm.updateToken(index, placeHolderSB.toString());
}
vn.pop();
} else if (textCount > 1) {
// 没有 w:/t 节点的段落,不进行处理。如果这个节点里面还有 p 节点,那么不进行处理
// System.out.println("vn.getCurrentIndex() = " + vn.getCurrentIndex());
// if (185 == vn.getCurrentIndex()) {
// System.out.println("调试开始了。。。。");
// }
vn.push();
// 先获取出这个段落里所有要翻译的数据,再分段
List<StringBuffer> segList = new ArrayList<StringBuffer>();
StringBuffer segSB = new StringBuffer();
childAP.selectXPath("./node()[(name()='w:r' or name()='w:hyperlink') and not(@w:anchor) and not(descendant::node()[name()='w:p'])]");
while (childAP.evalXPath() != -1) {
if (vu.getElementContent().indexOf("<w:br") != -1) {
segList.add(segSB);
segSB = new StringBuffer();
}
String curText = getText();
curText = (curText == null ? "" : curText);
segSB.append(curText);
}
segList.add(segSB);
// 如果为空格,直接退出
StringBuffer checkBlankSB = new StringBuffer();
for (StringBuffer curSB : segList) {
checkBlankSB.append(curSB);
}
if (deleteBlank(checkBlankSB.toString()).length() <= 0) {
vn.pop();
return;
}
// System.out.println("checkBlankSB.toString() =" + checkBlankSB.toString());
// if (segSB.toString().indexOf("Definition des vorzuhaltenden") != -1) {
// System.out.println("错误信息开始了。。。。。");
// }
vn.pop();
// 开始分割文本段
List<String> segArrayList = new ArrayList<String>();
for (StringBuffer curSB : segList) {
String[] segArray = segmenter.segment(curSB.toString());
for (int i = 0; i < segArray.length; i++) {
segArrayList.add(segArray[i]);
}
}
String[] segArray = segArrayList.toArray(new String[] {});
// 开始遍历每个节点 ./node(),进行处理
vn.push();
childAP.selectXPath("./node()");
// 一个文本段的开始
boolean segStart = false;
// 一个文本段结束的标记
boolean segOver = false;
int segIdx = 0;
String seg = segArray[segIdx];
while (childAP.evalXPath() != -1) {
index = vn.getCurrentIndex();
isLink = false;
linkText = null;
String nodeName = vu.getCurrentElementName();
if ("w:r".equals(nodeName) || "w:hyperlink".equals(nodeName) || "w:fldSimple".equals(nodeName)) {
if ("w:hyperlink".equals(nodeName)) {
isLink = true;
linkText = getLinkText();
}
if ("w:r".equals(nodeName)) {
// 检查是否有软回车 <w: br/>
vn.push();
boolean hasBr = false;
extendAP.selectXPath("./w:br");
if (extendAP.evalXPath() != -1) {
hasBr = true;
}
vn.pop();
if (hasBr && (sectionSegMap.size() > 0)) {
// 遇到软回车就开始分段
segOver = true;
segStart = false;
String placeHoderStr = xlfOutput.addTransUnit(createSourceStr(sectionSegMap));
xm.insertBeforeElement(placeHoderStr);
segArray[segIdx] = seg;
continue;
}
}
String text = getText();
if (text != null) {
segStart = true;
segOver = false;
String ctypeAttrStr = "";
String rStyle = null;
String extendNodesStr = null;
if (isLink) {
ctypeAttrStr = " ctype='" + xlfOutput.cleanString(vu.getElementHead()) + "'";
vn.push();
extendAP.selectXPath("./w:r");
// 如果 linkText 不为空的话,就不用获取 rStyle 与 extendNodesStr 属性了
if (linkText == null && extendAP.evalXPath() != -1) {
rStyle = getRStyle();
extendNodesStr = getExtendNodes();
}
vn.pop();
} else {
rStyle = getRStyle();
extendNodesStr = getExtendNodes();
}
// extendNodesAttr 这个属性是自定义的,专门针对于除样式与文本之外的其他节点,如 <w:tab/>
String extendNodesAttr = ("".equals(extendNodesStr) || extendNodesStr == null) ? "" : " extendNodes='" + xlfOutput.cleanString(extendNodesStr) + "'";
// rprAttr 这个是保存 rpr 属性的。rpr属性是用于保存 w:r 样式。
String rprAttr = ("".equals(rStyle) || rStyle == null) ? "" : " rPr='" + xlfOutput.cleanString(rStyle) + "'";
// 如果当前文本是一个独立的分段,则将其文本用占位符替换。
if (text.equals(segArray[segIdx])) {
String placeHoderStr = "";
if (isLink && linkText != null) {
placeHoderStr = xlfOutput.addTransUnit(linkText);
} else {
placeHoderStr = xlfOutput.addTransUnit(seg);
}
updateTextToPlaceHoder(placeHoderStr);
if (segIdx + 1 < segArray.length) {
seg = segArray[++segIdx];
}
segOver = true;
segStart = false;
continue;
}
// 链接不应支持分段,现开始分析
if (isLink) {
List<Object> resultList = modifySeg(segArray, segIdx, text, seg);
segArray = (String[]) resultList.get(0);
seg = (String) resultList.get(1);
}
// 分析分割后的文本段
if (text.equals(seg)) {
if ("".equals(rprAttr) && "".equals(extendNodesAttr)) {
// 只添加纯文本
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, null, null, null));
} else {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, rprAttr, extendNodesAttr, null));
}
if (segIdx + 1 < segArray.length) {
seg = segArray[++segIdx];
}
segOver = true;
segStart = false;
xm.remove();
String placeHoderStr = xlfOutput.addTransUnit(createSourceStr(sectionSegMap));
xm.insertAfterElement(placeHoderStr);
} else if (text.length() < seg.length() && seg.indexOf(text) == 0) {
// 如果当前文本长度小于分段长度
if ("".equals(rprAttr) && "".equals(extendNodesAttr)) {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, null, null, null));
} else {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, rprAttr, extendNodesAttr, null));
}
xm.remove();
// 在替换有 "(" 的情况。必须加一个 \\( 或者 [(],否则会报错
seg = seg.substring(text.length());
} else if (text.length() > seg.length() && text.indexOf(seg) == 0) {
if ("".equals(rprAttr) && "".equals(extendNodesAttr)) {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, seg, null, null, null));
} else {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, seg, rprAttr, extendNodesAttr, null));
}
text = text.substring(seg.length());
// 由于这里有可能要插入多个占位符,所以要把所有的占位符放到一起,一次性存入,因为 xmlModifial 不允许在同一个地方修改多次。
StringBuffer replaceHolderSB = new StringBuffer();
replaceHolderSB.append(xlfOutput.addTransUnit(createSourceStr(sectionSegMap)));
xm.remove();
if (segIdx + 1 < segArray.length) {
seg = segArray[++segIdx];
}
// 开始处理剩下的文本
while (text.length() != 0) {
if (text.equals(seg)) {
if ("".equals(rprAttr) && "".equals(extendNodesAttr)) {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, null, null, null));
} else {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, rprAttr, extendNodesAttr, null));
}
text = "";
segOver = true;
segStart = false;
replaceHolderSB.append(xlfOutput.addTransUnit(createSourceStr(sectionSegMap)));
if (segIdx + 1 < segArray.length) {
seg = segArray[++segIdx];
}
} else if (text.length() < seg.length() && seg.indexOf(text) == 0) {
if ("".equals(rprAttr) && "".equals(extendNodesAttr)) {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, null, null, null));
} else {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, text, rprAttr, extendNodesAttr, null));
}
seg = seg.substring(text.length());
text = "";
} else if (text.length() > seg.length() && text.indexOf(seg) == 0) {
if ("".equals(rprAttr) && "".equals(extendNodesAttr)) {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, seg, null, null, null));
} else {
sectionSegMap.put(index, newSectionSegBean(ctypeAttrStr, seg, rprAttr, extendNodesAttr, null));
}
text = text.substring(seg.length());
replaceHolderSB.append(xlfOutput.addTransUnit(createSourceStr(sectionSegMap)));
if (segIdx + 1 < segArray.length) {
seg = segArray[++segIdx];
}
}
}
xm.insertAfterElement(replaceHolderSB.toString());
}
} else if (segStart && !segOver) {
sectionSegMap.put(index, new SectionSegBean(null, null, null, null, xlfOutput.cleanString(vu.getElementFragment())));
xm.remove();
}
} else if (segStart && !segOver) {
sectionSegMap.put(index, new SectionSegBean(null, null, null, null, xlfOutput.cleanString(vu.getElementFragment())));
xm.remove();
}
}
vn.pop();
}
}
use of net.heartsome.cat.converter.word2007.common.SectionSegBean in project translationstudio8 by heartsome.
the class PartOperate method createSourceStr.
/**
* 通过sectionSegMap生成要添加到 trans-unit 节点的源文本
* @param sectionSegMap
* @return
*/
private String createSourceStr(Map<Integer, SectionSegBean> sectionSegMap) {
List<SectionSegBean> segList = new LinkedList<SectionSegBean>();
for (Entry<Integer, SectionSegBean> entry : sectionSegMap.entrySet()) {
segList.add(entry.getValue());
}
StringBuffer srcTextSB = new StringBuffer();
SectionSegBean bean = null;
for (int i = 0; i < segList.size(); i++) {
bean = segList.get(i);
String ctype = bean.getCtype() == null ? "" : bean.getCtype();
String style = bean.getStyle() == null ? "" : bean.getStyle();
String extendNodes = bean.getExtendNodesStr() == null ? "" : bean.getExtendNodesStr();
if (("".equals(ctype) && "".equals(style)) && ("".equals(extendNodes)) && bean.getPhTagStr() == null) {
srcTextSB.append(bean.getText());
} else if (bean.getPhTagStr() == null) {
srcTextSB.append("<g id='" + xlfOutput.useTagId() + "'" + ctype + style + extendNodes + ">");
srcTextSB.append(bean.getText());
// 判断下一个是否样式与扩展节点的内容都相同,若相同,就组装成一个g标记
while (i + 1 < segList.size()) {
bean = segList.get(i + 1);
String curCtype = bean.getCtype() == null ? "" : bean.getCtype();
String curStyle = bean.getStyle() == null ? "" : bean.getStyle();
String curExtendNodes = bean.getExtendNodesStr() == null ? "" : bean.getExtendNodesStr();
// 当两个的 ctype 都为空时,才能进行拼接,因为 ctype 多半为 <w:hyperlink .... >
if (curStyle.equals(style) && curExtendNodes.equals(extendNodes) && "".equals(ctype) && "".equals(curCtype)) {
srcTextSB.append(bean.getText());
i++;
} else {
break;
}
}
srcTextSB.append("</g>");
} else {
String phTagStr = bean.getPhTagStr();
if (!"".equals(phTagStr.trim())) {
srcTextSB.append("<ph id='" + xlfOutput.useTagId() + "'>");
srcTextSB.append(phTagStr);
while (i + 1 < segList.size()) {
bean = segList.get(i + 1);
if (bean.getPhTagStr() != null) {
srcTextSB.append(bean.getPhTagStr());
i++;
} else {
break;
}
}
srcTextSB.append("</ph>");
}
}
}
sectionSegMap.clear();
return srcTextSB.toString();
}
use of net.heartsome.cat.converter.word2007.common.SectionSegBean in project translationstudio8 by heartsome.
the class XliffInputer method ananysisTag.
/**
* 分析标记
*/
private void ananysisTag(VTDNav vn, Map<Integer, SectionSegBean> targetMap) throws Exception {
vn.push();
AutoPilot tagAP = new AutoPilot(vn);
int index = vn.getCurrentIndex();
String tagName = vn.toString(index);
if ("g".equals(tagName)) {
String style = "";
int attrIdx = -1;
if ((attrIdx = vn.getAttrVal("rPr")) != -1) {
style = vn.toString(attrIdx);
}
String extendNodes = "";
if ((attrIdx = vn.getAttrVal("extendNodes")) != -1) {
extendNodes = vn.toString(attrIdx);
}
String ctype = "";
if ((attrIdx = vn.getAttrVal("ctype")) != -1) {
ctype = vn.toString(attrIdx);
}
// 首先检查 g 标记下是否有 sub 节点
int subNodeCount = -1;
tagAP.selectXPath("count(./descendant::sub)");
subNodeCount = (int) tagAP.evalXPathToNumber();
tagAP.selectXPath("./node()|text()");
if (subNodeCount > 0) {
int curIdx = vn.getCurrentIndex();
StringBuffer gTextSB = new StringBuffer();
Map<Integer, String> gTextMap = new TreeMap<Integer, String>();
while (tagAP.evalXPath() != -1) {
index = vn.getCurrentIndex();
int tokenType = vn.getTokenType(index);
if (tokenType == 0) {
//节点子节点
String nodeName = vn.toString(index);
if ("ph".equals(nodeName)) {
gTextMap.put(index, resetCleanStr(vu.getElementContent()));
} else if ("g".equals(nodeName)) {
ananysisTag(vn, targetMap);
} else if ("sub".equals(nodeName)) {
ananysisSubTag(vn, gTextMap, targetMap);
}
} else if (tokenType == 5) {
//文本子节点
gTextMap.put(index, resetCleanStr(vn.toRawString(index)));
}
}
for (Entry<Integer, String> entry : gTextMap.entrySet()) {
gTextSB.append(entry.getValue());
}
targetMap.put(curIdx, new SectionSegBean(ctype, gTextSB.toString(), style, extendNodes, null));
} else {
while (tagAP.evalXPath() != -1) {
index = vn.getCurrentIndex();
int tokenType = vn.getTokenType(index);
if (tokenType == 0) {
//节点子节点
String nodeName = vn.toString(index);
if ("ph".equals(nodeName)) {
targetMap.put(index, new SectionSegBean(null, null, null, null, resetCleanStr(vu.getElementContent())));
} else if ("g".equals(nodeName)) {
ananysisTag(vn, targetMap);
}
} else if (tokenType == 5) {
//文本子节点
targetMap.put(index, new SectionSegBean(ctype, vn.toRawString(index), style, extendNodes, null));
}
}
}
} else if ("ph".equals(tagName)) {
targetMap.put(index, new SectionSegBean(null, null, null, null, resetCleanStr(vu.getElementContent())));
} else if ("sub".equals(tagName)) {
String style = "";
int attrIdx = -1;
if ((attrIdx = vn.getAttrVal("rPr")) != -1) {
style = vn.toString(attrIdx);
}
String extendNodes = "";
if ((attrIdx = vn.getAttrVal("extendNodes")) != -1) {
extendNodes = vn.toString(attrIdx);
}
tagAP.selectXPath("./node()|text()");
while (tagAP.evalXPath() != -1) {
index = vn.getCurrentIndex();
int tokenType = vn.getTokenType(index);
if (tokenType == 0) {
//节点子节点
String nodeName = vn.toString(index);
if ("ph".equals(nodeName)) {
targetMap.put(index, new SectionSegBean(null, null, null, null, resetCleanStr(vu.getElementContent())));
} else if ("g".equals(nodeName)) {
ananysisTag(vn, targetMap);
}
} else if (tokenType == 5) {
//文本子节点
targetMap.put(index, new SectionSegBean(null, vn.toRawString(index), style, extendNodes, null));
}
}
} else {
//其他节点,一律当做字符串处理
targetMap.put(index, new SectionSegBean(null, null, null, null, resetCleanStr(vu.getElementFragment())));
}
vn.pop();
}
Aggregations