Search in sources :

Example 1 with WordsFABean

use of net.heartsome.cat.ts.core.qa.WordsFABean in project translationstudio8 by heartsome.

the class WordsFA method matching.

/**
	 * 开始处理匹配操作 如果返回null,则是用户点击退出按钮,执行退出操作
	 */
public Map<String, WordsFAResult> matching(IProgressMonitor monitor) {
    int matchTravelTuIndex = 0;
    // 字数统计的结果集合
    Map<String, WordsFAResult> wordsFAResultMap = new LinkedHashMap<String, WordsFAResult>();
    Map<String, ArrayList<String>> languages = handler.getLanguages();
    //先对每个文件存放一个结果集
    for (IFile iFile : model.getAnalysisIFileList()) {
        String filePath = iFile.getLocation().toOSString();
        wordsFAResultMap.put(filePath, new WordsFAResult());
    }
    //针对每一个文件的结果集
    WordsFAResult wordFaResult;
    for (Entry<String, ArrayList<String>> langEntry : languages.entrySet()) {
        String srcLanguage = langEntry.getKey();
        for (String tgtLanguage : langEntry.getValue()) {
            //针对每个文件,每种语言对获取其内容
            allSrcTextsMap = getAllXlfSrcTexts(srcLanguage.toUpperCase(), tgtLanguage.toUpperCase());
            // 如果返回的值为空,则标志用户点击了退出操作,那么退出程序
            if (allSrcTextsMap == null) {
                continue;
            }
            WordsFABean bean;
            List<TranslationUnitAnalysisResult> exterMatchResult = null;
            // 字数统计的结果集合
            for (Entry<String, Map<String, WordsFABean>> textEntry : allSrcTextsMap.entrySet()) {
                String filePath = textEntry.getKey();
                IFile iFile = ResourceUtils.fileToIFile(filePath);
                // 存储匹配结果的pojo类
                wordFaResult = wordsFAResultMap.get(filePath);
                Map<String, WordsFABean> fileSrcTextMap = textEntry.getValue();
                monitor.setTaskName(MessageFormat.format(Messages.getString("qa.fileAnalysis.WordsFA.tip1"), iFile.getFullPath().toOSString()));
                Iterator<Entry<String, WordsFABean>> it = fileSrcTextMap.entrySet().iterator();
                while (it.hasNext()) {
                    Entry<String, WordsFABean> entry = (Entry<String, WordsFABean>) it.next();
                    matchTravelTuIndex++;
                    String rowId = entry.getKey();
                    bean = entry.getValue();
                    String srcPureText = bean.getSrcPureText();
                    int textLength = bean.getSrcLength();
                    String preTextHash = bean.getPreHash();
                    String nextTextHash = bean.getNextHash();
                    boolean isLocked = bean.isLocked();
                    String tagStr = bean.getTagStr();
                    int wordsCount = CountWord.wordCount(srcPureText, srcLanguage);
                    //若处于锁定状态,则添加到已锁定字数,然后跳出,执行下一文本段
                    if (isLocked) {
                        wordFaResult.setLockedPara(QAConstant.QA_FIRST);
                        wordFaResult.setLockedWords(wordsCount);
                        //删除该文本段,不再进行比较
                        fileSrcTextMap.remove(rowId);
                        it = fileSrcTextMap.entrySet().iterator();
                        continue;
                    }
                    // UNDO 应先判断数据库是否可用。
                    // 第一步,进行外部匹配,先封装参数。
                    tuInfoBean = new TransUnitInfo2TranslationBean();
                    tuInfoBean.setNextContext(bean.getNextHash());
                    tuInfoBean.setPreContext(bean.getPreHash());
                    tuInfoBean.setSrcFullText(bean.getSrcContent());
                    tuInfoBean.setSrcLanguage(srcLanguage);
                    tuInfoBean.setSrcPureText(bean.getSrcPureText());
                    tuInfoBean.setTgtLangugage(tgtLanguage);
                    exterMatchResult = tmMatcher.analysTranslationUnit(curProject, tuInfoBean);
                    int exterMatchRate = 0;
                    if (exterMatchResult != null && exterMatchResult.size() > 0) {
                        exterMatchRate = exterMatchResult.get(0).getSimilarity();
                    }
                    if (exterMatchRate == 100) {
                        // 如果锁定外部 100% 匹配,那么这些字数将被添加到锁定字数,而非外部 100% 匹配,外部101% 一样
                        if (model.isLockExter100()) {
                            wordFaResult.setLockedPara(QAConstant.QA_FIRST);
                            wordFaResult.setLockedWords(wordsCount);
                            needLockRowIdList.add(rowId);
                        } else {
                            wordFaResult.setExterRepeatPara(QAConstant.QA_FIRST);
                            wordFaResult.setExterMatchWords(wordsCount);
                            wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
                        }
                        //删除该文本段,不再进行比较
                        fileSrcTextMap.remove(rowId);
                        it = fileSrcTextMap.entrySet().iterator();
                        if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
                            return null;
                        }
                        continue;
                    }
                    if (exterMatchRate == 101) {
                        if (model.isLockExter101()) {
                            wordFaResult.setLockedPara(QAConstant.QA_FIRST);
                            wordFaResult.setLockedWords(wordsCount);
                            needLockRowIdList.add(rowId);
                        } else {
                            wordFaResult.setExterRepeatPara(QAConstant.QA_FIRST);
                            wordFaResult.setExterMatchWords(wordsCount);
                            wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
                        }
                        //删除该文本段,不再进行比较
                        fileSrcTextMap.remove(rowId);
                        it = fileSrcTextMap.entrySet().iterator();
                        if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
                            return null;
                        }
                        continue;
                    }
                    // 第二步,进行内部匹配
                    int inteMatchRate = 0;
                    // 如果要检查内部重复,那么就查找内部匹配
                    if (interRepeat) {
                        // 如果不进行内部模糊匹配,那么直接
                        int interNewWordsMaxMatchRate = interMatch ? newWordsMaxMatchRate : 100;
                        internalMatching(rowId, srcPureText, tagStr, textLength, preTextHash, nextTextHash, interNewWordsMaxMatchRate);
                    }
                    inteMatchRate = bean.getThisMatchRate() > inteMatchRate ? bean.getThisMatchRate() : inteMatchRate;
                    int maxMacthRate = exterMatchRate > inteMatchRate ? exterMatchRate : inteMatchRate;
                    if (inteMatchRate == 100 || inteMatchRate == 101) {
                        if (model.isLockInterRepeat()) {
                            wordFaResult.setLockedPara(QAConstant.QA_FIRST);
                            wordFaResult.setLockedWords(wordsCount);
                            needLockRowIdList.add(rowId);
                        } else {
                            wordFaResult.setInterRepeatPara(QAConstant.QA_FIRST);
                            wordFaResult.setInterMatchWords(wordsCount);
                            wordFaResult.setAllInterMatchWords(inteMatchRate, wordsCount);
                        }
                    } else if (maxMacthRate < newWordsMaxMatchRate) {
                        //最大匹配小于最小匹配时,就为新字数
                        wordFaResult.setNewPara(QAConstant.QA_FIRST);
                        wordFaResult.setNewWords(wordsCount);
                    } else {
                        if (inteMatchRate > exterMatchRate) {
                            // 内部匹配
                            wordFaResult.setInterMatchPara(QAConstant.QA_FIRST);
                            wordFaResult.setInterMatchWords(wordsCount);
                            wordFaResult.setAllInterMatchWords(inteMatchRate, wordsCount);
                        } else {
                            //外部匹配
                            wordFaResult.setExterMatchPara(QAConstant.QA_FIRST);
                            wordFaResult.setExterMatchWords(wordsCount);
                            wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
                        }
                    }
                    //删除该文本段,不再进行比较
                    fileSrcTextMap.remove(rowId);
                    it = fileSrcTextMap.entrySet().iterator();
                    if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
                        return null;
                    }
                }
                wordsFAResultMap.put(filePath, wordFaResult);
            }
            if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
                return null;
            }
        }
    }
    lockRepeatTU(wordsFAResultMap);
    return wordsFAResultMap;
}
Also used : IFile(org.eclipse.core.resources.IFile) WordsFABean(net.heartsome.cat.ts.core.qa.WordsFABean) ArrayList(java.util.ArrayList) TranslationUnitAnalysisResult(net.heartsome.cat.common.bean.TranslationUnitAnalysisResult) WordsFAResult(net.heartsome.cat.ts.ui.qa.model.WordsFAResult) TransUnitInfo2TranslationBean(net.heartsome.cat.ts.tm.bean.TransUnitInfo2TranslationBean) LinkedHashMap(java.util.LinkedHashMap) Entry(java.util.Map.Entry) Map(java.util.Map) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap)

Example 2 with WordsFABean

use of net.heartsome.cat.ts.core.qa.WordsFABean in project translationstudio8 by heartsome.

the class WordsFA method internalMatching.

/**
	 * 内部匹配 备注,是将一个source节点拿去跟所有的source节点进行匹配,并称这个source节点为比较者,其他所有的source节点为被比较者
	 * 关于上下文匹配,在获取分割xliff文件时就没有必要去做了,故在获取加权系数时,传入两个空值即可
	 * @param rowId
	 *            : 被比较者的trans-unit节点的唯一标识符
	 * @param srcContent
	 *            : 被比较者的trans-unit节点的source子节点的完整内容(包括标记)
	 * @param srcPureText
	 *            : 被比较者的trans-unit节点的source子节点的纯文本
	 * @param preTextHash
	 *            上文的hash值
	 * @param nextTexthash
	 *            下文的hash值
	 */
public int internalMatching(String rowId, String srcPureText, String tagStr, int textLength, String preTextHash, String nextTexthash, int interNewWordsMaxMatchRate) {
    // 匹配率
    int matchRate = 0;
    // System.out.println("长度为="+srcContent.length());
    //System.out.println("rowId = " + rowId);
    //System.out.println("ignoreTag = " + ignoreTag);
    int fileSize = model.getAnalysisIFileList().size();
    List<IFile> fileList = model.getAnalysisIFileList();
    for (int fileIndex = 0; fileIndex < fileSize; fileIndex++) {
        IFile iFile = fileList.get(fileIndex);
        String filePath = iFile.getLocation().toOSString();
        Map<String, WordsFABean> fileSrcTextMap = allSrcTextsMap.get(filePath);
        Iterator<Entry<String, WordsFABean>> it = fileSrcTextMap.entrySet().iterator();
        WordsFABean curBean;
        while (it.hasNext()) {
            Entry<String, WordsFABean> entry = (Entry<String, WordsFABean>) it.next();
            String curRowId = entry.getKey();
            // 比较者不与自己进行比较
            if (rowId.equals(curRowId)) {
                continue;
            }
            curBean = entry.getValue();
            String curSrcPureText = curBean.getSrcPureText();
            if (!checkIsideal(ignoreCase ? srcPureText.toLowerCase() : srcPureText, ignoreCase ? curSrcPureText.toLowerCase() : curSrcPureText, interNewWordsMaxMatchRate)) {
                continue;
            }
            // long time2 = System.currentTimeMillis();
            int curMatchRate = 0;
            curMatchRate = MatchQuality.similarity(ignoreCase ? srcPureText.toLowerCase() : srcPureText, ignoreCase ? curSrcPureText.toLowerCase() : curSrcPureText);
            if (!ignoreTag) {
                String curTagStr = curBean.getTagStr();
                if (!curTagStr.equals(tagStr)) {
                    curMatchRate -= tagPenalty;
                }
            }
            if (curMatchRate > matchRate) {
                matchRate = curMatchRate;
            }
            //如果当前文本段的匹配率小于本次所比较的匹配率,则重新刷新匹配率
            curBean.setThisMatchRate(curMatchRate);
        // System.out.println("比较时= " + (System.currentTimeMillis() - time2));
        }
    }
    return matchRate;
}
Also used : Entry(java.util.Map.Entry) IFile(org.eclipse.core.resources.IFile) WordsFABean(net.heartsome.cat.ts.core.qa.WordsFABean)

Aggregations

Entry (java.util.Map.Entry)2 WordsFABean (net.heartsome.cat.ts.core.qa.WordsFABean)2 IFile (org.eclipse.core.resources.IFile)2 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 LinkedHashMap (java.util.LinkedHashMap)1 Map (java.util.Map)1 TranslationUnitAnalysisResult (net.heartsome.cat.common.bean.TranslationUnitAnalysisResult)1 TransUnitInfo2TranslationBean (net.heartsome.cat.ts.tm.bean.TransUnitInfo2TranslationBean)1 WordsFAResult (net.heartsome.cat.ts.ui.qa.model.WordsFAResult)1