use of net.heartsome.cat.ts.core.qa.WordsFABean in project translationstudio8 by heartsome.
the class WordsFA method matching.
/**
* 开始处理匹配操作 如果返回null,则是用户点击退出按钮,执行退出操作
*/
public Map<String, WordsFAResult> matching(IProgressMonitor monitor) {
int matchTravelTuIndex = 0;
// 字数统计的结果集合
Map<String, WordsFAResult> wordsFAResultMap = new LinkedHashMap<String, WordsFAResult>();
Map<String, ArrayList<String>> languages = handler.getLanguages();
//先对每个文件存放一个结果集
for (IFile iFile : model.getAnalysisIFileList()) {
String filePath = iFile.getLocation().toOSString();
wordsFAResultMap.put(filePath, new WordsFAResult());
}
//针对每一个文件的结果集
WordsFAResult wordFaResult;
for (Entry<String, ArrayList<String>> langEntry : languages.entrySet()) {
String srcLanguage = langEntry.getKey();
for (String tgtLanguage : langEntry.getValue()) {
//针对每个文件,每种语言对获取其内容
allSrcTextsMap = getAllXlfSrcTexts(srcLanguage.toUpperCase(), tgtLanguage.toUpperCase());
// 如果返回的值为空,则标志用户点击了退出操作,那么退出程序
if (allSrcTextsMap == null) {
continue;
}
WordsFABean bean;
List<TranslationUnitAnalysisResult> exterMatchResult = null;
// 字数统计的结果集合
for (Entry<String, Map<String, WordsFABean>> textEntry : allSrcTextsMap.entrySet()) {
String filePath = textEntry.getKey();
IFile iFile = ResourceUtils.fileToIFile(filePath);
// 存储匹配结果的pojo类
wordFaResult = wordsFAResultMap.get(filePath);
Map<String, WordsFABean> fileSrcTextMap = textEntry.getValue();
monitor.setTaskName(MessageFormat.format(Messages.getString("qa.fileAnalysis.WordsFA.tip1"), iFile.getFullPath().toOSString()));
Iterator<Entry<String, WordsFABean>> it = fileSrcTextMap.entrySet().iterator();
while (it.hasNext()) {
Entry<String, WordsFABean> entry = (Entry<String, WordsFABean>) it.next();
matchTravelTuIndex++;
String rowId = entry.getKey();
bean = entry.getValue();
String srcPureText = bean.getSrcPureText();
int textLength = bean.getSrcLength();
String preTextHash = bean.getPreHash();
String nextTextHash = bean.getNextHash();
boolean isLocked = bean.isLocked();
String tagStr = bean.getTagStr();
int wordsCount = CountWord.wordCount(srcPureText, srcLanguage);
//若处于锁定状态,则添加到已锁定字数,然后跳出,执行下一文本段
if (isLocked) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
continue;
}
// UNDO 应先判断数据库是否可用。
// 第一步,进行外部匹配,先封装参数。
tuInfoBean = new TransUnitInfo2TranslationBean();
tuInfoBean.setNextContext(bean.getNextHash());
tuInfoBean.setPreContext(bean.getPreHash());
tuInfoBean.setSrcFullText(bean.getSrcContent());
tuInfoBean.setSrcLanguage(srcLanguage);
tuInfoBean.setSrcPureText(bean.getSrcPureText());
tuInfoBean.setTgtLangugage(tgtLanguage);
exterMatchResult = tmMatcher.analysTranslationUnit(curProject, tuInfoBean);
int exterMatchRate = 0;
if (exterMatchResult != null && exterMatchResult.size() > 0) {
exterMatchRate = exterMatchResult.get(0).getSimilarity();
}
if (exterMatchRate == 100) {
// 如果锁定外部 100% 匹配,那么这些字数将被添加到锁定字数,而非外部 100% 匹配,外部101% 一样
if (model.isLockExter100()) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
needLockRowIdList.add(rowId);
} else {
wordFaResult.setExterRepeatPara(QAConstant.QA_FIRST);
wordFaResult.setExterMatchWords(wordsCount);
wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
}
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
continue;
}
if (exterMatchRate == 101) {
if (model.isLockExter101()) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
needLockRowIdList.add(rowId);
} else {
wordFaResult.setExterRepeatPara(QAConstant.QA_FIRST);
wordFaResult.setExterMatchWords(wordsCount);
wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
}
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
continue;
}
// 第二步,进行内部匹配
int inteMatchRate = 0;
// 如果要检查内部重复,那么就查找内部匹配
if (interRepeat) {
// 如果不进行内部模糊匹配,那么直接
int interNewWordsMaxMatchRate = interMatch ? newWordsMaxMatchRate : 100;
internalMatching(rowId, srcPureText, tagStr, textLength, preTextHash, nextTextHash, interNewWordsMaxMatchRate);
}
inteMatchRate = bean.getThisMatchRate() > inteMatchRate ? bean.getThisMatchRate() : inteMatchRate;
int maxMacthRate = exterMatchRate > inteMatchRate ? exterMatchRate : inteMatchRate;
if (inteMatchRate == 100 || inteMatchRate == 101) {
if (model.isLockInterRepeat()) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
needLockRowIdList.add(rowId);
} else {
wordFaResult.setInterRepeatPara(QAConstant.QA_FIRST);
wordFaResult.setInterMatchWords(wordsCount);
wordFaResult.setAllInterMatchWords(inteMatchRate, wordsCount);
}
} else if (maxMacthRate < newWordsMaxMatchRate) {
//最大匹配小于最小匹配时,就为新字数
wordFaResult.setNewPara(QAConstant.QA_FIRST);
wordFaResult.setNewWords(wordsCount);
} else {
if (inteMatchRate > exterMatchRate) {
// 内部匹配
wordFaResult.setInterMatchPara(QAConstant.QA_FIRST);
wordFaResult.setInterMatchWords(wordsCount);
wordFaResult.setAllInterMatchWords(inteMatchRate, wordsCount);
} else {
//外部匹配
wordFaResult.setExterMatchPara(QAConstant.QA_FIRST);
wordFaResult.setExterMatchWords(wordsCount);
wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
}
}
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
}
wordsFAResultMap.put(filePath, wordFaResult);
}
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
}
}
lockRepeatTU(wordsFAResultMap);
return wordsFAResultMap;
}
use of net.heartsome.cat.ts.core.qa.WordsFABean in project translationstudio8 by heartsome.
the class WordsFA method internalMatching.
/**
* 内部匹配 备注,是将一个source节点拿去跟所有的source节点进行匹配,并称这个source节点为比较者,其他所有的source节点为被比较者
* 关于上下文匹配,在获取分割xliff文件时就没有必要去做了,故在获取加权系数时,传入两个空值即可
* @param rowId
* : 被比较者的trans-unit节点的唯一标识符
* @param srcContent
* : 被比较者的trans-unit节点的source子节点的完整内容(包括标记)
* @param srcPureText
* : 被比较者的trans-unit节点的source子节点的纯文本
* @param preTextHash
* 上文的hash值
* @param nextTexthash
* 下文的hash值
*/
public int internalMatching(String rowId, String srcPureText, String tagStr, int textLength, String preTextHash, String nextTexthash, int interNewWordsMaxMatchRate) {
// 匹配率
int matchRate = 0;
// System.out.println("长度为="+srcContent.length());
//System.out.println("rowId = " + rowId);
//System.out.println("ignoreTag = " + ignoreTag);
int fileSize = model.getAnalysisIFileList().size();
List<IFile> fileList = model.getAnalysisIFileList();
for (int fileIndex = 0; fileIndex < fileSize; fileIndex++) {
IFile iFile = fileList.get(fileIndex);
String filePath = iFile.getLocation().toOSString();
Map<String, WordsFABean> fileSrcTextMap = allSrcTextsMap.get(filePath);
Iterator<Entry<String, WordsFABean>> it = fileSrcTextMap.entrySet().iterator();
WordsFABean curBean;
while (it.hasNext()) {
Entry<String, WordsFABean> entry = (Entry<String, WordsFABean>) it.next();
String curRowId = entry.getKey();
// 比较者不与自己进行比较
if (rowId.equals(curRowId)) {
continue;
}
curBean = entry.getValue();
String curSrcPureText = curBean.getSrcPureText();
if (!checkIsideal(ignoreCase ? srcPureText.toLowerCase() : srcPureText, ignoreCase ? curSrcPureText.toLowerCase() : curSrcPureText, interNewWordsMaxMatchRate)) {
continue;
}
// long time2 = System.currentTimeMillis();
int curMatchRate = 0;
curMatchRate = MatchQuality.similarity(ignoreCase ? srcPureText.toLowerCase() : srcPureText, ignoreCase ? curSrcPureText.toLowerCase() : curSrcPureText);
if (!ignoreTag) {
String curTagStr = curBean.getTagStr();
if (!curTagStr.equals(tagStr)) {
curMatchRate -= tagPenalty;
}
}
if (curMatchRate > matchRate) {
matchRate = curMatchRate;
}
//如果当前文本段的匹配率小于本次所比较的匹配率,则重新刷新匹配率
curBean.setThisMatchRate(curMatchRate);
// System.out.println("比较时= " + (System.currentTimeMillis() - time2));
}
}
return matchRate;
}
Aggregations