use of net.heartsome.xml.Element in project translationstudio8 by heartsome.
the class StringSegmenter method segment.
/**
* Segment.
* @param string
* the string
* @return the string[]
*/
public String[] segment(String string) {
if (string.trim().equals("") || rules.size() == 0) {
//$NON-NLS-1$
String[] result = new String[1];
result[0] = string;
return result;
}
Vector<String> strings = new Vector<String>();
tags = new Hashtable<String, String>();
strings.add(prepareString(string));
// now segment the strings
int rulessize = rules.size();
for (int i = 0; i < rulessize; i++) {
Element rule = rules.get(i);
//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
boolean breaks = rule.getAttributeValue("break", "yes").equalsIgnoreCase("yes");
//$NON-NLS-1$
Element before = rule.getChild("beforebreak");
//$NON-NLS-1$
Element after = rule.getChild("afterbreak");
//$NON-NLS-1$
String beforexp = "";
if (before != null) {
beforexp = before.getText();
}
//$NON-NLS-1$
String afterxp = "";
if (after != null) {
afterxp = after.getText();
}
if (breaks) {
// This rule tries to break segments
Vector<String> temp = new Vector<String>();
for (int j = 0; j < strings.size(); j++) {
String[] parts = split(strings.get(j), beforexp, afterxp);
for (int k = 0; k < parts.length; k++) {
temp.add(parts[k]);
}
}
strings = null;
strings = temp;
} else {
// strings = connect3(strings,beforexp,afterxp);
// This rule marks exceptions, like abbreviations
Vector<String> temp = new Vector<String>();
String current = strings.get(0);
for (int j = 1; j < strings.size(); j++) {
String next = strings.get(j);
if (endsWith(current, beforexp) && startsWith(next, afterxp)) {
current = current + next;
} else {
temp.add(current);
current = next;
}
}
temp.add(current);
strings = null;
strings = temp;
}
}
String[] result = new String[strings.size()];
for (int h = 0; h < strings.size(); h++) {
result[h] = cleanup(strings.get(h));
}
return analysisBlank(result);
}
use of net.heartsome.xml.Element in project translationstudio8 by heartsome.
the class ReverseConversionValidateWithLibrary3 method getSkeleton.
/**
* 获取骨架文件
* @return 骨架文件路径
* @throws IOException
* 在读取骨架文件失败时抛出 IO 异常 ;
*/
private String getSkeleton(String xlfPath) throws IOException {
//$NON-NLS-1$
String result = "";
//$NON-NLS-1$
Element file = root.getChild("file");
Element header = null;
String encoding = "";
if (file != null) {
//$NON-NLS-1$
header = file.getChild("header");
if (header != null) {
// 添加源文件编码的读取
//$NON-NLS-1$
List<Element> propGroups = header.getChildren("hs:prop-group");
for (int i = 0; i < propGroups.size(); i++) {
Element prop = propGroups.get(i);
if (prop.getAttributeValue("name").equals("encoding")) {
//$NON-NLS-1$ //$NON-NLS-2$
encoding = prop.getText().trim();
break;
}
}
if (encoding.equals("utf-8")) {
//$NON-NLS-1$
//$NON-NLS-1$
encoding = "UTF-8";
}
//$NON-NLS-1$
Element mskl = header.getChild("skl");
if (mskl != null) {
//$NON-NLS-1$
Element external = mskl.getChild("external-file");
IFile xlfIfile = ConverterUtil.localPath2IFile(xlfPath);
if (external != null) {
//$NON-NLS-1$
result = external.getAttributeValue("href");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll("&", "&");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll("<", "<");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll(">", ">");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll("'", "\'");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll(""", "\"");
result = xlfIfile.getProject().getLocation().toOSString() + result;
} else {
//$NON-NLS-1$
Element internal = mskl.getChild("internal-file");
if (internal != null) {
//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
File tmp = File.createTempFile("internal", ".skl", new File(xlfIfile.getProject().getWorkspace().getRoot().getLocation().toOSString()));
tmp.deleteOnExit();
FileOutputStream out = new FileOutputStream(tmp);
List<Node> content = internal.getContent();
for (int i = 0; i < content.size(); i++) {
Node n = content.get(i);
if (n.getNodeType() == Node.TEXT_NODE) {
out.write(n.getNodeValue().getBytes(encoding));
} else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
// fixed bub 515 by john.
String cdataString = n.getNodeValue();
if (cdataString.endsWith("]]")) {
//$NON-NLS-1$
//$NON-NLS-1$
cdataString += ">";
}
out.write(cdataString.getBytes());
}
}
out.close();
return tmp.getAbsolutePath();
}
return result;
}
external = null;
mskl = null;
} else {
return result;
}
} else {
return result;
}
} else {
return result;
}
if (encoding != null) {
if (encoding.equals("")) {
//$NON-NLS-1$
//$NON-NLS-1$
List<Element> groups = header.getChildren("hs:prop-group");
for (int i = 0; i < groups.size(); i++) {
Element group = groups.get(i);
//$NON-NLS-1$
List<Element> props = group.getChildren("hs:prop");
for (int k = 0; k < props.size(); k++) {
Element prop = props.get(k);
if (prop.getAttributeValue("prop-type", "").equals("encoding")) {
//$NON-NLS-1$
encoding = prop.getText();
}
}
}
}
}
header = null;
file = null;
return result;
}
use of net.heartsome.xml.Element in project translationstudio8 by heartsome.
the class ReverseConversionValidateWithLibrary3 method readXliff.
/**
* @param xliff
* xliff 文件的路径
* @throws ParserConfigurationException
* @throws IOException
* @throws SAXException
*/
private void readXliff(String xliff) throws SAXException, IOException, ParserConfigurationException {
builder = new SAXBuilder();
builder.setEntityResolver(new Catalogue(ConverterContext.catalogue));
doc = builder.build(xliff);
root = doc.getRootElement();
//$NON-NLS-1$
Element file = root.getChild("file");
//$NON-NLS-1$
dataType = file.getAttributeValue("datatype");
//$NON-NLS-1$ //$NON-NLS-2$
targetLanguage = file.getAttributeValue("target-language", Messages.getString("model.ReverseConversionValidateWithLibrary3.msg4"));
}
use of net.heartsome.xml.Element in project translationstudio8 by heartsome.
the class ReverseConversionValidateWithLibrary3 method reBuildXlf.
/**
* 构建 xliff 文件副本
* @param tmpXLFFile
* @throws IOException
* ;
*/
private void reBuildXlf(File tmpXLFFile) throws IOException {
long startTime = 0;
if (LOGGER.isInfoEnabled()) {
startTime = System.currentTimeMillis();
LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger14"), startTime);
}
for (int i = 0, size = segments.size() - 1; i < size; i++) {
Element e = segments.get(i);
//$NON-NLS-1$
Element src = e.getChild("source");
//$NON-NLS-1$
Element tgt = e.getChild("target");
//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
boolean isApproved = e.getAttributeValue("approved", "no").equalsIgnoreCase("yes");
List<Node> srcList = src.getContent();
Vector<Node> tmp = new Vector<Node>();
for (int j = 0, jSize = srcList.size(); j < jSize; j++) {
Node o = srcList.get(j);
if (o.getNodeType() == Node.ELEMENT_NODE && o.getNodeName().equals("ph")) {
//$NON-NLS-1$
Element el = new Element(o);
if (el.getAttributeValue("id", "").startsWith("hs-merge")) {
//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
//$NON-NLS-1$ //$NON-NLS-2$
String tmpMergeId = el.getAttributeValue("id", "").substring(8);
//$NON-NLS-1$
String[] pairId = tmpMergeId.split("~");
srcList.remove(j);
j--;
jSize--;
//$NON-NLS-1$
int idIndex = pairId[0].indexOf("-");
if (idIndex != -1) {
pairId[0] = pairId[0].substring(0, idIndex);
}
//$NON-NLS-1$
idIndex = pairId[1].indexOf("-");
if (idIndex != -1) {
pairId[1] = pairId[1].substring(0, idIndex);
}
if (!pairId[0].equals(pairId[1])) {
pairId = null;
break;
}
pairId = null;
} else {
srcList.remove(j);
j--;
jSize--;
tmp.add(o);
}
} else {
srcList.remove(j);
j--;
jSize--;
tmp.add(o);
}
}
src.removeAllChildren();
src.setContent(tmp);
tmp = null;
if (tgt == null) {
//$NON-NLS-1$
tgt = new Element("target", doc);
//$NON-NLS-1$
tgt.setAttribute(Messages.getString("model.ReverseConversionValidateWithLibrary3.msg5"), targetLanguage);
//$NON-NLS-1$ //$NON-NLS-2$
tgt.setAttribute("state", "new");
List<Element> content = e.getChildren();
Vector<Element> newContent = new Vector<Element>();
for (int m = 0; m < content.size(); m++) {
Element tmpEl = content.get(m);
newContent.add(tmpEl);
if (tmpEl.getName().equals("source")) {
//$NON-NLS-1$
newContent.add(tgt);
}
tmpEl = null;
}
e.setContent(newContent);
newContent = null;
content = null;
}
List<Node> tgtList = tgt.getContent();
tmp = new Vector<Node>();
for (int j = 0, jSize = tgtList.size(); j < jSize; j++) {
Node o = tgtList.get(j);
if (o.getNodeType() == Node.ELEMENT_NODE && o.getNodeName().equals("ph")) {
//$NON-NLS-1$
Element el = new Element(o);
if (el.getAttributeValue("id", "").startsWith("hs-merge")) {
//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
//$NON-NLS-1$ //$NON-NLS-2$
String tmpMergeId = el.getAttributeValue("id", "").substring(8);
//$NON-NLS-1$
String[] pairId = tmpMergeId.split("~");
tgtList.remove(j);
j--;
jSize--;
//$NON-NLS-1$
int idIndex = pairId[0].indexOf("-");
if (idIndex != -1) {
pairId[0] = pairId[0].substring(0, idIndex);
}
//$NON-NLS-1$
idIndex = pairId[1].indexOf("-");
if (idIndex != -1) {
pairId[1] = pairId[1].substring(0, idIndex);
}
if (!pairId[0].equals(pairId[1])) {
pairId = null;
break;
}
pairId = null;
} else {
tgtList.remove(j);
j--;
jSize--;
tmp.add(o);
}
el = null;
} else {
tgtList.remove(j);
j--;
jSize--;
tmp.add(o);
}
}
tgt.removeAllChildren();
tgt.setContent(tmp);
tmp = null;
Element nextEl = segments.get(i + 1);
if (!isApproved && srcList.size() > 0) {
//$NON-NLS-1$ //$NON-NLS-2$
nextEl.setAttribute("approved", "no");
}
//$NON-NLS-1$
Element nextSrc = nextEl.getChild("source");
//$NON-NLS-1$
Element nextTgt = nextEl.getChild("target");
if (nextTgt == null) {
//$NON-NLS-1$
nextTgt = new Element("target", doc);
//$NON-NLS-1$
nextTgt.setAttribute("xml:lang", targetLanguage);
//$NON-NLS-1$ //$NON-NLS-2$
nextTgt.setAttribute("state", "new");
List<Element> content = nextEl.getChildren();
Vector<Element> newContent = new Vector<Element>();
for (int m = 0; m < content.size(); m++) {
Element tmpEl = content.get(m);
newContent.add(tmpEl);
if (tmpEl.getName().equals("source")) {
//$NON-NLS-1$
newContent.add(nextTgt);
}
tmpEl = null;
}
nextEl.setContent(newContent);
newContent = null;
content = null;
}
List<Node> nextSrcContent = nextSrc.getContent();
List<Node> nextTgtContent = nextTgt.getContent();
nextSrc.removeAllChildren();
Vector<Node> newNextSrcContent = new Vector<Node>();
newNextSrcContent.addAll(srcList);
for (int j = 0, jSize = nextSrcContent.size(); j < jSize; j++) {
newNextSrcContent.add(nextSrcContent.get(j));
}
nextSrc.setContent(newNextSrcContent);
newNextSrcContent = null;
nextTgt.removeAllChildren();
Vector<Node> newNextTgtContent = new Vector<Node>();
newNextTgtContent.addAll(tgtList);
for (int j = 0, jSize = nextTgtContent.size(); j < jSize; j++) {
newNextTgtContent.add(nextTgtContent.get(j));
}
nextTgt.setContent(newNextTgtContent);
newNextTgtContent = null;
}
long endTime = 0;
if (LOGGER.isInfoEnabled()) {
endTime = System.currentTimeMillis();
LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger15"), endTime);
LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger16"), (endTime - startTime));
}
XMLOutputter outputter = new XMLOutputter();
outputter.preserveSpace(true);
FileOutputStream out;
out = new FileOutputStream(tmpXLFFile);
if (LOGGER.isInfoEnabled()) {
startTime = System.currentTimeMillis();
LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger17"), startTime);
}
outputter.output(doc, out);
if (LOGGER.isInfoEnabled()) {
endTime = System.currentTimeMillis();
LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger18"), endTime);
LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger19"), (endTime - startTime));
}
out.close();
outputter = null;
}
use of net.heartsome.xml.Element in project translationstudio8 by heartsome.
the class ReverseConversionValidateWithLibrary3 method getSkeleton.
/**
* 获取骨架文件
* @return 骨架文件路径
* @throws IOException
* 在读取骨架文件失败时抛出 IO 异常 ;
*/
private String getSkeleton() throws IOException {
//$NON-NLS-1$
String result = "";
//$NON-NLS-1$
Element file = root.getChild("file");
Element header = null;
String encoding = "";
if (file != null) {
//$NON-NLS-1$
header = file.getChild("header");
if (header != null) {
// 添加源文件编码的读取
//$NON-NLS-1$
List<Element> propGroups = header.getChildren("hs:prop-group");
for (int i = 0; i < propGroups.size(); i++) {
Element prop = propGroups.get(i);
if (prop.getAttributeValue("name").equals("encoding")) {
//$NON-NLS-1$ //$NON-NLS-2$
encoding = prop.getText().trim();
break;
}
}
if (encoding.equals("utf-8")) {
//$NON-NLS-1$
//$NON-NLS-1$
encoding = "UTF-8";
}
//$NON-NLS-1$
Element mskl = header.getChild("skl");
if (mskl != null) {
//$NON-NLS-1$
Element external = mskl.getChild("external-file");
if (external != null) {
//$NON-NLS-1$
result = external.getAttributeValue("href");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll("&", "&");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll("<", "<");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll(">", ">");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll("'", "\'");
//$NON-NLS-1$ //$NON-NLS-2$
result = result.replaceAll(""", "\"");
} else {
//$NON-NLS-1$
Element internal = mskl.getChild("internal-file");
if (internal != null) {
//$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
File tmp = File.createTempFile("internal", ".skl", new File("skl"));
tmp.deleteOnExit();
FileOutputStream out = new FileOutputStream(tmp);
List<Node> content = internal.getContent();
for (int i = 0; i < content.size(); i++) {
Node n = content.get(i);
if (n.getNodeType() == Node.TEXT_NODE) {
out.write(n.getNodeValue().getBytes(encoding));
} else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
// fixed bub 515 by john.
String cdataString = n.getNodeValue();
if (cdataString.endsWith("]]")) {
//$NON-NLS-1$
//$NON-NLS-1$
cdataString += ">";
}
out.write(cdataString.getBytes(encoding));
}
}
out.close();
return tmp.getAbsolutePath();
}
return result;
}
external = null;
mskl = null;
} else {
return result;
}
} else {
return result;
}
} else {
return result;
}
if (encoding != null) {
if (encoding.equals("")) {
//$NON-NLS-1$
//$NON-NLS-1$
List<Element> groups = header.getChildren("hs:prop-group");
for (int i = 0; i < groups.size(); i++) {
Element group = groups.get(i);
//$NON-NLS-1$
List<Element> props = group.getChildren("hs:prop");
for (int k = 0; k < props.size(); k++) {
Element prop = props.get(k);
if (prop.getAttributeValue("prop-type", "").equals("encoding")) {
//$NON-NLS-1$
encoding = prop.getText();
}
}
}
}
}
header = null;
file = null;
return result;
}
Aggregations