Search in sources :

Example 6 with Element

use of net.heartsome.xml.Element in project translationstudio8 by heartsome.

the class StringSegmenter method segment.

/**
	 * Segment.
	 * @param string
	 *            the string
	 * @return the string[]
	 */
public String[] segment(String string) {
    if (string.trim().equals("") || rules.size() == 0) {
        //$NON-NLS-1$
        String[] result = new String[1];
        result[0] = string;
        return result;
    }
    Vector<String> strings = new Vector<String>();
    tags = new Hashtable<String, String>();
    strings.add(prepareString(string));
    // now segment the strings
    int rulessize = rules.size();
    for (int i = 0; i < rulessize; i++) {
        Element rule = rules.get(i);
        //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
        boolean breaks = rule.getAttributeValue("break", "yes").equalsIgnoreCase("yes");
        //$NON-NLS-1$
        Element before = rule.getChild("beforebreak");
        //$NON-NLS-1$
        Element after = rule.getChild("afterbreak");
        //$NON-NLS-1$
        String beforexp = "";
        if (before != null) {
            beforexp = before.getText();
        }
        //$NON-NLS-1$
        String afterxp = "";
        if (after != null) {
            afterxp = after.getText();
        }
        if (breaks) {
            // This rule tries to break segments
            Vector<String> temp = new Vector<String>();
            for (int j = 0; j < strings.size(); j++) {
                String[] parts = split(strings.get(j), beforexp, afterxp);
                for (int k = 0; k < parts.length; k++) {
                    temp.add(parts[k]);
                }
            }
            strings = null;
            strings = temp;
        } else {
            // strings = connect3(strings,beforexp,afterxp);
            // This rule marks exceptions, like abbreviations
            Vector<String> temp = new Vector<String>();
            String current = strings.get(0);
            for (int j = 1; j < strings.size(); j++) {
                String next = strings.get(j);
                if (endsWith(current, beforexp) && startsWith(next, afterxp)) {
                    current = current + next;
                } else {
                    temp.add(current);
                    current = next;
                }
            }
            temp.add(current);
            strings = null;
            strings = temp;
        }
    }
    String[] result = new String[strings.size()];
    for (int h = 0; h < strings.size(); h++) {
        result[h] = cleanup(strings.get(h));
    }
    return analysisBlank(result);
}
Also used : Element(net.heartsome.xml.Element) Vector(java.util.Vector)

Example 7 with Element

use of net.heartsome.xml.Element in project translationstudio8 by heartsome.

the class ReverseConversionValidateWithLibrary3 method getSkeleton.

/**
	 * 获取骨架文件
	 * @return 骨架文件路径
	 * @throws IOException
	 *             在读取骨架文件失败时抛出 IO 异常 ;
	 */
private String getSkeleton(String xlfPath) throws IOException {
    //$NON-NLS-1$
    String result = "";
    //$NON-NLS-1$
    Element file = root.getChild("file");
    Element header = null;
    String encoding = "";
    if (file != null) {
        //$NON-NLS-1$
        header = file.getChild("header");
        if (header != null) {
            // 添加源文件编码的读取
            //$NON-NLS-1$
            List<Element> propGroups = header.getChildren("hs:prop-group");
            for (int i = 0; i < propGroups.size(); i++) {
                Element prop = propGroups.get(i);
                if (prop.getAttributeValue("name").equals("encoding")) {
                    //$NON-NLS-1$ //$NON-NLS-2$
                    encoding = prop.getText().trim();
                    break;
                }
            }
            if (encoding.equals("utf-8")) {
                //$NON-NLS-1$
                //$NON-NLS-1$
                encoding = "UTF-8";
            }
            //$NON-NLS-1$
            Element mskl = header.getChild("skl");
            if (mskl != null) {
                //$NON-NLS-1$
                Element external = mskl.getChild("external-file");
                IFile xlfIfile = ConverterUtil.localPath2IFile(xlfPath);
                if (external != null) {
                    //$NON-NLS-1$
                    result = external.getAttributeValue("href");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&amp;", "&");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&lt;", "<");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&gt;", ">");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&apos;", "\'");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&quot;", "\"");
                    result = xlfIfile.getProject().getLocation().toOSString() + result;
                } else {
                    //$NON-NLS-1$
                    Element internal = mskl.getChild("internal-file");
                    if (internal != null) {
                        //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
                        File tmp = File.createTempFile("internal", ".skl", new File(xlfIfile.getProject().getWorkspace().getRoot().getLocation().toOSString()));
                        tmp.deleteOnExit();
                        FileOutputStream out = new FileOutputStream(tmp);
                        List<Node> content = internal.getContent();
                        for (int i = 0; i < content.size(); i++) {
                            Node n = content.get(i);
                            if (n.getNodeType() == Node.TEXT_NODE) {
                                out.write(n.getNodeValue().getBytes(encoding));
                            } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
                                // fixed bub 515 by john.
                                String cdataString = n.getNodeValue();
                                if (cdataString.endsWith("]]")) {
                                    //$NON-NLS-1$
                                    //$NON-NLS-1$
                                    cdataString += ">";
                                }
                                out.write(cdataString.getBytes());
                            }
                        }
                        out.close();
                        return tmp.getAbsolutePath();
                    }
                    return result;
                }
                external = null;
                mskl = null;
            } else {
                return result;
            }
        } else {
            return result;
        }
    } else {
        return result;
    }
    if (encoding != null) {
        if (encoding.equals("")) {
            //$NON-NLS-1$
            //$NON-NLS-1$
            List<Element> groups = header.getChildren("hs:prop-group");
            for (int i = 0; i < groups.size(); i++) {
                Element group = groups.get(i);
                //$NON-NLS-1$
                List<Element> props = group.getChildren("hs:prop");
                for (int k = 0; k < props.size(); k++) {
                    Element prop = props.get(k);
                    if (prop.getAttributeValue("prop-type", "").equals("encoding")) {
                        //$NON-NLS-1$
                        encoding = prop.getText();
                    }
                }
            }
        }
    }
    header = null;
    file = null;
    return result;
}
Also used : IFile(org.eclipse.core.resources.IFile) Element(net.heartsome.xml.Element) FileOutputStream(java.io.FileOutputStream) Node(org.w3c.dom.Node) IFile(org.eclipse.core.resources.IFile) File(java.io.File)

Example 8 with Element

use of net.heartsome.xml.Element in project translationstudio8 by heartsome.

the class ReverseConversionValidateWithLibrary3 method readXliff.

/**
	 * @param xliff
	 *             xliff 文件的路径
	 * @throws ParserConfigurationException
	 * @throws IOException
	 * @throws SAXException
	 */
private void readXliff(String xliff) throws SAXException, IOException, ParserConfigurationException {
    builder = new SAXBuilder();
    builder.setEntityResolver(new Catalogue(ConverterContext.catalogue));
    doc = builder.build(xliff);
    root = doc.getRootElement();
    //$NON-NLS-1$
    Element file = root.getChild("file");
    //$NON-NLS-1$
    dataType = file.getAttributeValue("datatype");
    //$NON-NLS-1$ //$NON-NLS-2$
    targetLanguage = file.getAttributeValue("target-language", Messages.getString("model.ReverseConversionValidateWithLibrary3.msg4"));
}
Also used : Catalogue(net.heartsome.xml.Catalogue) SAXBuilder(net.heartsome.xml.SAXBuilder) Element(net.heartsome.xml.Element)

Example 9 with Element

use of net.heartsome.xml.Element in project translationstudio8 by heartsome.

the class ReverseConversionValidateWithLibrary3 method reBuildXlf.

/**
	 * 构建 xliff 文件副本
	 * @param tmpXLFFile
	 * @throws IOException
	 *             ;
	 */
private void reBuildXlf(File tmpXLFFile) throws IOException {
    long startTime = 0;
    if (LOGGER.isInfoEnabled()) {
        startTime = System.currentTimeMillis();
        LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger14"), startTime);
    }
    for (int i = 0, size = segments.size() - 1; i < size; i++) {
        Element e = segments.get(i);
        //$NON-NLS-1$
        Element src = e.getChild("source");
        //$NON-NLS-1$
        Element tgt = e.getChild("target");
        //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
        boolean isApproved = e.getAttributeValue("approved", "no").equalsIgnoreCase("yes");
        List<Node> srcList = src.getContent();
        Vector<Node> tmp = new Vector<Node>();
        for (int j = 0, jSize = srcList.size(); j < jSize; j++) {
            Node o = srcList.get(j);
            if (o.getNodeType() == Node.ELEMENT_NODE && o.getNodeName().equals("ph")) {
                //$NON-NLS-1$
                Element el = new Element(o);
                if (el.getAttributeValue("id", "").startsWith("hs-merge")) {
                    //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
                    //$NON-NLS-1$ //$NON-NLS-2$
                    String tmpMergeId = el.getAttributeValue("id", "").substring(8);
                    //$NON-NLS-1$
                    String[] pairId = tmpMergeId.split("~");
                    srcList.remove(j);
                    j--;
                    jSize--;
                    //$NON-NLS-1$
                    int idIndex = pairId[0].indexOf("-");
                    if (idIndex != -1) {
                        pairId[0] = pairId[0].substring(0, idIndex);
                    }
                    //$NON-NLS-1$
                    idIndex = pairId[1].indexOf("-");
                    if (idIndex != -1) {
                        pairId[1] = pairId[1].substring(0, idIndex);
                    }
                    if (!pairId[0].equals(pairId[1])) {
                        pairId = null;
                        break;
                    }
                    pairId = null;
                } else {
                    srcList.remove(j);
                    j--;
                    jSize--;
                    tmp.add(o);
                }
            } else {
                srcList.remove(j);
                j--;
                jSize--;
                tmp.add(o);
            }
        }
        src.removeAllChildren();
        src.setContent(tmp);
        tmp = null;
        if (tgt == null) {
            //$NON-NLS-1$
            tgt = new Element("target", doc);
            //$NON-NLS-1$
            tgt.setAttribute(Messages.getString("model.ReverseConversionValidateWithLibrary3.msg5"), targetLanguage);
            //$NON-NLS-1$ //$NON-NLS-2$
            tgt.setAttribute("state", "new");
            List<Element> content = e.getChildren();
            Vector<Element> newContent = new Vector<Element>();
            for (int m = 0; m < content.size(); m++) {
                Element tmpEl = content.get(m);
                newContent.add(tmpEl);
                if (tmpEl.getName().equals("source")) {
                    //$NON-NLS-1$
                    newContent.add(tgt);
                }
                tmpEl = null;
            }
            e.setContent(newContent);
            newContent = null;
            content = null;
        }
        List<Node> tgtList = tgt.getContent();
        tmp = new Vector<Node>();
        for (int j = 0, jSize = tgtList.size(); j < jSize; j++) {
            Node o = tgtList.get(j);
            if (o.getNodeType() == Node.ELEMENT_NODE && o.getNodeName().equals("ph")) {
                //$NON-NLS-1$
                Element el = new Element(o);
                if (el.getAttributeValue("id", "").startsWith("hs-merge")) {
                    //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
                    //$NON-NLS-1$ //$NON-NLS-2$
                    String tmpMergeId = el.getAttributeValue("id", "").substring(8);
                    //$NON-NLS-1$
                    String[] pairId = tmpMergeId.split("~");
                    tgtList.remove(j);
                    j--;
                    jSize--;
                    //$NON-NLS-1$
                    int idIndex = pairId[0].indexOf("-");
                    if (idIndex != -1) {
                        pairId[0] = pairId[0].substring(0, idIndex);
                    }
                    //$NON-NLS-1$
                    idIndex = pairId[1].indexOf("-");
                    if (idIndex != -1) {
                        pairId[1] = pairId[1].substring(0, idIndex);
                    }
                    if (!pairId[0].equals(pairId[1])) {
                        pairId = null;
                        break;
                    }
                    pairId = null;
                } else {
                    tgtList.remove(j);
                    j--;
                    jSize--;
                    tmp.add(o);
                }
                el = null;
            } else {
                tgtList.remove(j);
                j--;
                jSize--;
                tmp.add(o);
            }
        }
        tgt.removeAllChildren();
        tgt.setContent(tmp);
        tmp = null;
        Element nextEl = segments.get(i + 1);
        if (!isApproved && srcList.size() > 0) {
            //$NON-NLS-1$ //$NON-NLS-2$
            nextEl.setAttribute("approved", "no");
        }
        //$NON-NLS-1$
        Element nextSrc = nextEl.getChild("source");
        //$NON-NLS-1$
        Element nextTgt = nextEl.getChild("target");
        if (nextTgt == null) {
            //$NON-NLS-1$
            nextTgt = new Element("target", doc);
            //$NON-NLS-1$
            nextTgt.setAttribute("xml:lang", targetLanguage);
            //$NON-NLS-1$ //$NON-NLS-2$
            nextTgt.setAttribute("state", "new");
            List<Element> content = nextEl.getChildren();
            Vector<Element> newContent = new Vector<Element>();
            for (int m = 0; m < content.size(); m++) {
                Element tmpEl = content.get(m);
                newContent.add(tmpEl);
                if (tmpEl.getName().equals("source")) {
                    //$NON-NLS-1$
                    newContent.add(nextTgt);
                }
                tmpEl = null;
            }
            nextEl.setContent(newContent);
            newContent = null;
            content = null;
        }
        List<Node> nextSrcContent = nextSrc.getContent();
        List<Node> nextTgtContent = nextTgt.getContent();
        nextSrc.removeAllChildren();
        Vector<Node> newNextSrcContent = new Vector<Node>();
        newNextSrcContent.addAll(srcList);
        for (int j = 0, jSize = nextSrcContent.size(); j < jSize; j++) {
            newNextSrcContent.add(nextSrcContent.get(j));
        }
        nextSrc.setContent(newNextSrcContent);
        newNextSrcContent = null;
        nextTgt.removeAllChildren();
        Vector<Node> newNextTgtContent = new Vector<Node>();
        newNextTgtContent.addAll(tgtList);
        for (int j = 0, jSize = nextTgtContent.size(); j < jSize; j++) {
            newNextTgtContent.add(nextTgtContent.get(j));
        }
        nextTgt.setContent(newNextTgtContent);
        newNextTgtContent = null;
    }
    long endTime = 0;
    if (LOGGER.isInfoEnabled()) {
        endTime = System.currentTimeMillis();
        LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger15"), endTime);
        LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger16"), (endTime - startTime));
    }
    XMLOutputter outputter = new XMLOutputter();
    outputter.preserveSpace(true);
    FileOutputStream out;
    out = new FileOutputStream(tmpXLFFile);
    if (LOGGER.isInfoEnabled()) {
        startTime = System.currentTimeMillis();
        LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger17"), startTime);
    }
    outputter.output(doc, out);
    if (LOGGER.isInfoEnabled()) {
        endTime = System.currentTimeMillis();
        LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger18"), endTime);
        LOGGER.info(Messages.getString("model.ReverseConversionValidateWithLibrary3.logger19"), (endTime - startTime));
    }
    out.close();
    outputter = null;
}
Also used : XMLOutputter(net.heartsome.xml.XMLOutputter) Element(net.heartsome.xml.Element) Node(org.w3c.dom.Node) FileOutputStream(java.io.FileOutputStream) Vector(java.util.Vector)

Example 10 with Element

use of net.heartsome.xml.Element in project translationstudio8 by heartsome.

the class ReverseConversionValidateWithLibrary3 method getSkeleton.

/**
	 * 获取骨架文件
	 * @return 骨架文件路径
	 * @throws IOException
	 *             在读取骨架文件失败时抛出 IO 异常 ;
	 */
private String getSkeleton() throws IOException {
    //$NON-NLS-1$
    String result = "";
    //$NON-NLS-1$
    Element file = root.getChild("file");
    Element header = null;
    String encoding = "";
    if (file != null) {
        //$NON-NLS-1$
        header = file.getChild("header");
        if (header != null) {
            // 添加源文件编码的读取
            //$NON-NLS-1$
            List<Element> propGroups = header.getChildren("hs:prop-group");
            for (int i = 0; i < propGroups.size(); i++) {
                Element prop = propGroups.get(i);
                if (prop.getAttributeValue("name").equals("encoding")) {
                    //$NON-NLS-1$ //$NON-NLS-2$
                    encoding = prop.getText().trim();
                    break;
                }
            }
            if (encoding.equals("utf-8")) {
                //$NON-NLS-1$
                //$NON-NLS-1$
                encoding = "UTF-8";
            }
            //$NON-NLS-1$
            Element mskl = header.getChild("skl");
            if (mskl != null) {
                //$NON-NLS-1$
                Element external = mskl.getChild("external-file");
                if (external != null) {
                    //$NON-NLS-1$
                    result = external.getAttributeValue("href");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&amp;", "&");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&lt;", "<");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&gt;", ">");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&apos;", "\'");
                    //$NON-NLS-1$ //$NON-NLS-2$
                    result = result.replaceAll("&quot;", "\"");
                } else {
                    //$NON-NLS-1$
                    Element internal = mskl.getChild("internal-file");
                    if (internal != null) {
                        //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
                        File tmp = File.createTempFile("internal", ".skl", new File("skl"));
                        tmp.deleteOnExit();
                        FileOutputStream out = new FileOutputStream(tmp);
                        List<Node> content = internal.getContent();
                        for (int i = 0; i < content.size(); i++) {
                            Node n = content.get(i);
                            if (n.getNodeType() == Node.TEXT_NODE) {
                                out.write(n.getNodeValue().getBytes(encoding));
                            } else if (n.getNodeType() == Node.CDATA_SECTION_NODE) {
                                // fixed bub 515 by john.
                                String cdataString = n.getNodeValue();
                                if (cdataString.endsWith("]]")) {
                                    //$NON-NLS-1$
                                    //$NON-NLS-1$
                                    cdataString += ">";
                                }
                                out.write(cdataString.getBytes(encoding));
                            }
                        }
                        out.close();
                        return tmp.getAbsolutePath();
                    }
                    return result;
                }
                external = null;
                mskl = null;
            } else {
                return result;
            }
        } else {
            return result;
        }
    } else {
        return result;
    }
    if (encoding != null) {
        if (encoding.equals("")) {
            //$NON-NLS-1$
            //$NON-NLS-1$
            List<Element> groups = header.getChildren("hs:prop-group");
            for (int i = 0; i < groups.size(); i++) {
                Element group = groups.get(i);
                //$NON-NLS-1$
                List<Element> props = group.getChildren("hs:prop");
                for (int k = 0; k < props.size(); k++) {
                    Element prop = props.get(k);
                    if (prop.getAttributeValue("prop-type", "").equals("encoding")) {
                        //$NON-NLS-1$
                        encoding = prop.getText();
                    }
                }
            }
        }
    }
    header = null;
    file = null;
    return result;
}
Also used : Element(net.heartsome.xml.Element) FileOutputStream(java.io.FileOutputStream) Node(org.w3c.dom.Node) File(java.io.File)

Aggregations

Element (net.heartsome.xml.Element)18 File (java.io.File)8 Catalogue (net.heartsome.xml.Catalogue)7 SAXBuilder (net.heartsome.xml.SAXBuilder)7 Document (net.heartsome.xml.Document)5 Node (org.w3c.dom.Node)5 FileOutputStream (java.io.FileOutputStream)4 Vector (java.util.Vector)3 IOException (java.io.IOException)2 ParserConfigurationException (javax.xml.parsers.ParserConfigurationException)2 XMLOutputter (net.heartsome.xml.XMLOutputter)2 TreeItem (org.eclipse.swt.widgets.TreeItem)2 SAXException (org.xml.sax.SAXException)2 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 URL (java.net.URL)1 IFile (org.eclipse.core.resources.IFile)1