Search in sources :

Example 76 with VTDUtils

use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.

the class DocUtils method isTMX.

/**
	 * 判断是否是正确的 TMX 文件
	 * @param fileName
	 * @return ;
	 * @throws FileNotFoundException
	 * @throws ParseException
	 * @throws EntityException
	 * @throws EOFException
	 * @throws EncodingException
	 */
public static VTDUtils isTMX(String fileName) throws FileNotFoundException, EncodingException, ParseException {
    VTDGen vg = new VTDGen();
    FileInputStream fis = null;
    File f = null;
    try {
        f = new File(fileName);
        fis = new FileInputStream(f);
        byte[] b = new byte[(int) f.length()];
        int offset = 0;
        int numRead = 0;
        // I choose this value randomally,
        int numOfBytes = 1048576;
        // any other (not too big) value also can be here.
        if (b.length - offset < numOfBytes) {
            numOfBytes = b.length - offset;
        }
        while (offset < b.length && (numRead = fis.read(b, offset, numOfBytes)) >= 0) {
            offset += numRead;
            if (b.length - offset < numOfBytes) {
                numOfBytes = b.length - offset;
            }
        }
        vg.setDoc(b);
        vg.parse(true);
    } catch (IOException e) {
        LOGGER.error(Messages.getString("document.DocUtils.logger1"), e);
    } finally {
        if (fis != null) {
            try {
                fis.close();
            } catch (Exception e) {
            }
        }
    }
    VTDNav vn = vg.getNav();
    AutoPilot ap = new AutoPilot(vn);
    String rootPath = "/tmx";
    VTDUtils vu = new VTDUtils();
    try {
        vu.bind(vn);
        ap.selectXPath(rootPath);
        if (ap.evalXPath() == -1) {
            return null;
        }
    } catch (NavException e) {
        LOGGER.error(Messages.getString("document.DocUtils.logger2"), e);
        return null;
    } catch (XPathEvalException e) {
        LOGGER.error(Messages.getString("document.DocUtils.logger2"), e);
        return null;
    } catch (XPathParseException e) {
        LOGGER.error(Messages.getString("document.DocUtils.logger2"), e);
        return null;
    } finally {
        vg.clear();
    }
    return vu;
}
Also used : NavException(com.ximpleware.NavException) XPathEvalException(com.ximpleware.XPathEvalException) VTDGen(com.ximpleware.VTDGen) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) EOFException(com.ximpleware.EOFException) XPathParseException(com.ximpleware.XPathParseException) NavException(com.ximpleware.NavException) IOException(java.io.IOException) EncodingException(com.ximpleware.EncodingException) FileNotFoundException(java.io.FileNotFoundException) ParseException(com.ximpleware.ParseException) XPathEvalException(com.ximpleware.XPathEvalException) EntityException(com.ximpleware.EntityException) XPathParseException(com.ximpleware.XPathParseException) VTDUtils(net.heartsome.xml.vtdimpl.VTDUtils) AutoPilot(com.ximpleware.AutoPilot) File(java.io.File) VTDNav(com.ximpleware.VTDNav)

Example 77 with VTDUtils

use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.

the class TmxReader method validateTmxAndParseHeader.

/**
	 * Validate TMX Format,and pilot to Body XMLElement
	 * @param vg
	 * @throws TmxReadException
	 *             ;
	 */
private void validateTmxAndParseHeader(VTDGen vg) throws TmxReadException {
    VTDNav vn = vg.getNav();
    AutoPilot ap = new AutoPilot(vn);
    String rootPath = "/tmx";
    vu = new VTDUtils();
    try {
        vu.bind(vn);
        ap.selectXPath(rootPath);
        if (ap.evalXPath() == -1) {
            throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
        }
        ap.resetXPath();
        ap.selectXPath("/tmx/header");
        if (ap.evalXPath() == -1) {
            throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
        }
        int id = vu.getVTDNav().getAttrVal("srclang");
        if (id == -1) {
            throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
        }
        header.setSrclang(vu.getVTDNav().toString(id).trim());
        if (vu.pilot("/tmx/body") == -1) {
            throw new TmxReadException(Messages.getString("document.TmxReader.validateTmxFileError"));
        }
        // compute total tu number
        this.totalTu = vu.getChildElementsCount();
    } catch (VTDException e) {
        logger.error("", e);
        throw new TmxReadException(Messages.getString("document.TmxReader.parseTmxFileError") + e.getMessage());
    } finally {
        vg.clear();
    }
}
Also used : VTDUtils(net.heartsome.xml.vtdimpl.VTDUtils) VTDException(com.ximpleware.VTDException) AutoPilot(com.ximpleware.AutoPilot) VTDNav(com.ximpleware.VTDNav)

Example 78 with VTDUtils

use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.

the class Docx2Xliff method idealizeGTag.

/**
	 * 简化 xliff 文件的标记,主要功能是将一个源文中的 g 标记进行抽取到骨架的操作。针对一个源文中只有一个 g 标记,并且该 g 标记包褒全文本段
	 * 生成一个 名为 interTag.xml 的文件,存放于骨架文件的第一级子目录,与 word 文件夹同目录
	 * 其结构大致为<br>
	 * &lt;docxTags&gt;<br>
	 * 		&lt;tag tuId="0" &gt;this is a tag&lt;/tag&gt;<br>
	 * &lt;/docxTags&gt;<br>
	 * <div style="color:red">备注:interTag.xml 介绍: 此文件并非 docx 的内部文件,而是保存转换 docx 文件时的部份 g标记(源文中只有一对 g 标记,并且是它包褒一整个文本段)</div>
	 */
private static void idealizeGTag(String xliffPath, String interTagPath) throws Exception {
    final String constantGHeader = "<g";
    final String constantGEnd = "</g>";
    VTDGen vg = new VTDGen();
    if (!vg.parseFile(xliffPath, true)) {
        throw new Exception();
    }
    VTDNav vn = vg.getNav();
    String xpath = "/xliff/file/body/descendant::trans-unit[source/text()!=''  or source/*]";
    AutoPilot ap = new AutoPilot(vn);
    AutoPilot childAP = new AutoPilot(vn);
    VTDUtils vu = new VTDUtils(vn);
    XMLModifier xm = new XMLModifier(vn);
    ap.selectXPath(xpath);
    int index = -1;
    String id = null;
    StringBuffer tagContentSB = new StringBuffer();
    while (ap.evalXPath() != -1) {
        id = null;
        index = vn.getAttrVal("id");
        if (index != -1) {
            id = vn.toString(index);
        }
        if (id == null) {
            vn.pop();
            continue;
        }
        vn.push();
        childAP.selectXPath("./source");
        if (childAP.evalXPath() == -1) {
            vn.pop();
            continue;
        }
        String srcText = vu.getElementContent();
        childAP.selectXPath("count(./g)");
        // 如果 g 标签个数为 1 ,并且包褒整个文本段,那么便可进行清理
        if (childAP.evalXPathToNumber() == 1) {
            if (srcText.indexOf(constantGHeader) == 0 && srcText.indexOf(constantGEnd) == (srcText.length() - 4)) {
                childAP.selectXPath("./g");
                if (childAP.evalXPath() != -1) {
                    String header = vu.getElementHead();
                    String content = vu.getElementContent();
                    // 删除 g 标记
                    xm.remove();
                    xm.insertAfterElement(content);
                    // 将删除的 g 标记保存至 interTag.xml 文件中
                    tagContentSB.append("\t<tag tuId=\"" + id + "\">" + header + "</g>" + "</tag>\n");
                }
            }
        }
        vn.pop();
    }
    xm.output(xliffPath);
    if (tagContentSB.length() > 0) {
        // 开始创建 interTag.xml 文件
        File file = new File(interTagPath);
        if (!file.exists()) {
            FileOutputStream output;
            output = new FileOutputStream(interTagPath);
            output.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n".getBytes("UTF-8"));
            output.write("<docxTags>\n".getBytes("UTF-8"));
            output.write(tagContentSB.toString().getBytes("UTF-8"));
            output.write("</docxTags>".getBytes("UTF-8"));
            output.close();
        }
    }
}
Also used : XMLModifier(com.ximpleware.XMLModifier) VTDUtils(net.heartsome.xml.vtdimpl.VTDUtils) AutoPilot(com.ximpleware.AutoPilot) FileOutputStream(java.io.FileOutputStream) VTDGen(com.ximpleware.VTDGen) VTDNav(com.ximpleware.VTDNav) File(java.io.File) DocxConverterException(net.heartsome.cat.converter.word2007.common.DocxConverterException) OperationCanceledException(org.eclipse.core.runtime.OperationCanceledException) ConverterException(net.heartsome.cat.converter.ConverterException)

Example 79 with VTDUtils

use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.

the class XliffInputer method loadXliff.

/**
	 * 解析 hsxliff 文件
	 * @throws Exception
	 */
private void loadXliff() throws Exception {
    VTDGen vg = new VTDGen();
    if (vg.parseFile(xliffFile, true)) {
        vn = vg.getNav();
        ap = new AutoPilot(vn);
        childAP = new AutoPilot(vn);
        ap.declareXPathNameSpace("hs", "http://www.heartsome.net.cn/2008/XLFExtension");
        childAP.declareXPathNameSpace("hs", "http://www.heartsome.net.cn/2008/XLFExtension");
        vu = new VTDUtils(vn);
    } else {
        throw new Exception(MessageFormat.format(Messages.getString("docxConvert.msg2"), xliffFile));
    }
}
Also used : VTDUtils(net.heartsome.xml.vtdimpl.VTDUtils) AutoPilot(com.ximpleware.AutoPilot) VTDGen(com.ximpleware.VTDGen)

Example 80 with VTDUtils

use of net.heartsome.xml.vtdimpl.VTDUtils in project translationstudio8 by heartsome.

the class XLFHandler method getFullAndPureText.

/**
	 * 获取trans-unit节点下source或target节点的全文本或纯文本 robert 2011-12-14
	 * @param xlfPath
	 *            : xliff文件路径
	 * @param nodeXpath
	 *            : trans-unit子节点source或target的xpath值 如果返回null,则证明这个节点是个空节点,要么没有这个节点,要么这个节点没有值
	 * @return textMap:两个值,key1 --> fullText:全文本,key2 --> pureText:纯文本。
	 */
public Map<String, String> getFullAndPureText(String xlfPath, String nodeXpath) {
    Map<String, String> textMap = new HashMap<String, String>();
    VTDNav vn = vnMap.get(xlfPath);
    vn.push();
    AutoPilot ap = new AutoPilot(vn);
    Assert.isNotNull(vn, Messages.getString("file.XLFHandler.msg4") + xlfPath);
    try {
        VTDUtils vUtils = new VTDUtils(vn);
        ap.selectXPath(nodeXpath);
        if (ap.evalXPath() != -1) {
            String content = vUtils.getElementContent();
            if (content != null && !"".equals(content)) {
                textMap.put("fullText", content);
                textMap.put("pureText", getTUPureText(vn));
            }
        }
    } catch (Exception e) {
        LOGGER.error("", e);
        e.printStackTrace();
    }
    vn.pop();
    return textMap;
}
Also used : HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) VTDUtils(net.heartsome.xml.vtdimpl.VTDUtils) AutoPilot(com.ximpleware.AutoPilot) VTDNav(com.ximpleware.VTDNav) NavException(com.ximpleware.NavException) CoreException(org.eclipse.core.runtime.CoreException) OperationCanceledException(org.eclipse.core.runtime.OperationCanceledException) XPathParseException(com.ximpleware.XPathParseException) FileNotFoundException(java.io.FileNotFoundException) XQException(javax.xml.xquery.XQException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) TranscodeException(com.ximpleware.TranscodeException) XPathEvalException(com.ximpleware.XPathEvalException) IOException(java.io.IOException) ModifyException(com.ximpleware.ModifyException)

Aggregations

VTDUtils (net.heartsome.xml.vtdimpl.VTDUtils)137 AutoPilot (com.ximpleware.AutoPilot)112 NavException (com.ximpleware.NavException)103 VTDNav (com.ximpleware.VTDNav)99 XPathParseException (com.ximpleware.XPathParseException)83 XPathEvalException (com.ximpleware.XPathEvalException)81 IOException (java.io.IOException)64 ModifyException (com.ximpleware.ModifyException)62 TranscodeException (com.ximpleware.TranscodeException)49 CoreException (org.eclipse.core.runtime.CoreException)45 XMLModifier (com.ximpleware.XMLModifier)41 VTDGen (com.ximpleware.VTDGen)33 FileNotFoundException (java.io.FileNotFoundException)30 UnsupportedEncodingException (java.io.UnsupportedEncodingException)29 HashMap (java.util.HashMap)27 OperationCanceledException (org.eclipse.core.runtime.OperationCanceledException)27 LinkedHashMap (java.util.LinkedHashMap)25 ArrayList (java.util.ArrayList)23 XQException (javax.xml.xquery.XQException)20 LinkedList (java.util.LinkedList)14