use of net.heartsome.cat.common.bean.TranslationUnitAnalysisResult in project translationstudio8 by heartsome.
the class TMDatabaseImpl method translationUnitAnalysis.
@Override
public void translationUnitAnalysis(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, TranslationUnitAnalysisResults analysisResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
List<String> tpkids = getCandidatesTextDataPks4Oracle(srcLang, minSimilarity, ngrams);
// 过虑拆分条件,解决Oracle中where xx in (参数) 参数不越过1000个
StringBuffer bf = new StringBuffer();
List<String> tmpTpkids = new ArrayList<String>();
for (int i = 0; i < tpkids.size(); i++) {
String tpkid = tpkids.get(i);
bf.append(",");
bf.append(tpkid);
if ((i + 1) % 100 == 0) {
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
}
}
if (bf.toString().equals("")) {
return;
}
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
bf.append("A.TPKID IN (" + tmpTpkids.get(0) + ")");
for (int i = 1; i < tmpTpkids.size(); i++) {
bf.append(" OR A.TPKID IN (" + tmpTpkids.get(i) + ")");
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch-wordsFA");
textDataSql = textDataSql.replace("__WHERE__", bf.toString());
textDataSql = textDataSql.replace("__TARGETLANG__", tgtLang);
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
if (similarity == 100 && CommonFunction.checkEdition("U")) {
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
preContext = preContext == null ? "" : preContext;
nextContext = nextContext == null ? "" : nextContext;
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
TranslationUnitAnalysisResult r = new TranslationUnitAnalysisResult(similarity, dbName);
analysisResults.add(r);
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TranslationUnitAnalysisResult in project translationstudio8 by heartsome.
the class TMDatabaseImpl method translationUnitAnalysis.
@Override
public void translationUnitAnalysis(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, TranslationUnitAnalysisResults analysisResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch-wordsFA");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
textDataSql = textDataSql.replace("__TARGETLANG__", tgtLang);
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
if (similarity == 100 && CommonFunction.checkEdition("U")) {
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
TranslationUnitAnalysisResult r = new TranslationUnitAnalysisResult(similarity, dbName);
analysisResults.add(r);
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TranslationUnitAnalysisResult in project translationstudio8 by heartsome.
the class LockTMSegment method searchTmAndLockTu.
/**
* 查询数据库的匹配并且锁定文本段
* @param xlfPath
* @param source_lan
* @param target_lan
* @param srcTextMap
*/
private void searchTmAndLockTu(String xlfPath, String source_lan, String target_lan, Map<String, String> srcTextMap) {
tuInfoBean = new TransUnitInfo2TranslationBean();
String srcContent = srcTextMap.get("content");
if (srcContent == null || "".equals(srcContent)) {
return;
}
tuInfoBean.setNextContext(srcTextMap.get("nextHash"));
tuInfoBean.setPreContext(srcTextMap.get("preHash"));
tuInfoBean.setSrcFullText(srcContent);
tuInfoBean.setSrcLanguage(source_lan);
tuInfoBean.setSrcPureText(srcTextMap.get("pureText"));
tuInfoBean.setTgtLangugage(target_lan);
int a = 1;
List<TranslationUnitAnalysisResult> tmResult = tmMatcher.analysTranslationUnit(curProject, tuInfoBean);
if (tmResult != null && tmResult.size() > 0) {
int similarity = tmResult.get(0).getSimilarity();
if (isLockedContextMatch && similarity == 101) {
xlfHandler.lockTransUnit(xlfPath, "no");
Integer lockedNum = lockedContextResult.get(xlfPath);
if (lockedNum == null) {
lockedContextResult.put(xlfPath, 1);
} else {
lockedContextResult.put(xlfPath, lockedNum + 1);
}
needLockRowIdMap.get(xlfPath).add(srcTextMap.get("rowId"));
} else if (isLockedFullMatch && similarity == 100) {
xlfHandler.lockTransUnit(xlfPath, "no");
Integer lockedNum = lockedFullMatchResult.get(xlfPath);
if (lockedNum == null) {
lockedFullMatchResult.put(xlfPath, 1);
} else {
lockedFullMatchResult.put(xlfPath, lockedNum + 1);
}
needLockRowIdMap.get(xlfPath).add(srcTextMap.get("rowId"));
}
a++;
}
}
use of net.heartsome.cat.common.bean.TranslationUnitAnalysisResult in project translationstudio8 by heartsome.
the class WordsFA method matching.
/**
* 开始处理匹配操作 如果返回null,则是用户点击退出按钮,执行退出操作
*/
public Map<String, WordsFAResult> matching(IProgressMonitor monitor) {
int matchTravelTuIndex = 0;
// 字数统计的结果集合
Map<String, WordsFAResult> wordsFAResultMap = new LinkedHashMap<String, WordsFAResult>();
Map<String, ArrayList<String>> languages = handler.getLanguages();
//先对每个文件存放一个结果集
for (IFile iFile : model.getAnalysisIFileList()) {
String filePath = iFile.getLocation().toOSString();
wordsFAResultMap.put(filePath, new WordsFAResult());
}
//针对每一个文件的结果集
WordsFAResult wordFaResult;
for (Entry<String, ArrayList<String>> langEntry : languages.entrySet()) {
String srcLanguage = langEntry.getKey();
for (String tgtLanguage : langEntry.getValue()) {
//针对每个文件,每种语言对获取其内容
allSrcTextsMap = getAllXlfSrcTexts(srcLanguage.toUpperCase(), tgtLanguage.toUpperCase());
// 如果返回的值为空,则标志用户点击了退出操作,那么退出程序
if (allSrcTextsMap == null) {
continue;
}
WordsFABean bean;
List<TranslationUnitAnalysisResult> exterMatchResult = null;
// 字数统计的结果集合
for (Entry<String, Map<String, WordsFABean>> textEntry : allSrcTextsMap.entrySet()) {
String filePath = textEntry.getKey();
IFile iFile = ResourceUtils.fileToIFile(filePath);
// 存储匹配结果的pojo类
wordFaResult = wordsFAResultMap.get(filePath);
Map<String, WordsFABean> fileSrcTextMap = textEntry.getValue();
monitor.setTaskName(MessageFormat.format(Messages.getString("qa.fileAnalysis.WordsFA.tip1"), iFile.getFullPath().toOSString()));
Iterator<Entry<String, WordsFABean>> it = fileSrcTextMap.entrySet().iterator();
while (it.hasNext()) {
Entry<String, WordsFABean> entry = (Entry<String, WordsFABean>) it.next();
matchTravelTuIndex++;
String rowId = entry.getKey();
bean = entry.getValue();
String srcPureText = bean.getSrcPureText();
int textLength = bean.getSrcLength();
String preTextHash = bean.getPreHash();
String nextTextHash = bean.getNextHash();
boolean isLocked = bean.isLocked();
String tagStr = bean.getTagStr();
int wordsCount = CountWord.wordCount(srcPureText, srcLanguage);
//若处于锁定状态,则添加到已锁定字数,然后跳出,执行下一文本段
if (isLocked) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
continue;
}
// UNDO 应先判断数据库是否可用。
// 第一步,进行外部匹配,先封装参数。
tuInfoBean = new TransUnitInfo2TranslationBean();
tuInfoBean.setNextContext(bean.getNextHash());
tuInfoBean.setPreContext(bean.getPreHash());
tuInfoBean.setSrcFullText(bean.getSrcContent());
tuInfoBean.setSrcLanguage(srcLanguage);
tuInfoBean.setSrcPureText(bean.getSrcPureText());
tuInfoBean.setTgtLangugage(tgtLanguage);
exterMatchResult = tmMatcher.analysTranslationUnit(curProject, tuInfoBean);
int exterMatchRate = 0;
if (exterMatchResult != null && exterMatchResult.size() > 0) {
exterMatchRate = exterMatchResult.get(0).getSimilarity();
}
if (exterMatchRate == 100) {
// 如果锁定外部 100% 匹配,那么这些字数将被添加到锁定字数,而非外部 100% 匹配,外部101% 一样
if (model.isLockExter100()) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
needLockRowIdList.add(rowId);
} else {
wordFaResult.setExterRepeatPara(QAConstant.QA_FIRST);
wordFaResult.setExterMatchWords(wordsCount);
wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
}
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
continue;
}
if (exterMatchRate == 101) {
if (model.isLockExter101()) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
needLockRowIdList.add(rowId);
} else {
wordFaResult.setExterRepeatPara(QAConstant.QA_FIRST);
wordFaResult.setExterMatchWords(wordsCount);
wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
}
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
continue;
}
// 第二步,进行内部匹配
int inteMatchRate = 0;
// 如果要检查内部重复,那么就查找内部匹配
if (interRepeat) {
// 如果不进行内部模糊匹配,那么直接
int interNewWordsMaxMatchRate = interMatch ? newWordsMaxMatchRate : 100;
internalMatching(rowId, srcPureText, tagStr, textLength, preTextHash, nextTextHash, interNewWordsMaxMatchRate);
}
inteMatchRate = bean.getThisMatchRate() > inteMatchRate ? bean.getThisMatchRate() : inteMatchRate;
int maxMacthRate = exterMatchRate > inteMatchRate ? exterMatchRate : inteMatchRate;
if (inteMatchRate == 100 || inteMatchRate == 101) {
if (model.isLockInterRepeat()) {
wordFaResult.setLockedPara(QAConstant.QA_FIRST);
wordFaResult.setLockedWords(wordsCount);
needLockRowIdList.add(rowId);
} else {
wordFaResult.setInterRepeatPara(QAConstant.QA_FIRST);
wordFaResult.setInterMatchWords(wordsCount);
wordFaResult.setAllInterMatchWords(inteMatchRate, wordsCount);
}
} else if (maxMacthRate < newWordsMaxMatchRate) {
//最大匹配小于最小匹配时,就为新字数
wordFaResult.setNewPara(QAConstant.QA_FIRST);
wordFaResult.setNewWords(wordsCount);
} else {
if (inteMatchRate > exterMatchRate) {
// 内部匹配
wordFaResult.setInterMatchPara(QAConstant.QA_FIRST);
wordFaResult.setInterMatchWords(wordsCount);
wordFaResult.setAllInterMatchWords(inteMatchRate, wordsCount);
} else {
//外部匹配
wordFaResult.setExterMatchPara(QAConstant.QA_FIRST);
wordFaResult.setExterMatchWords(wordsCount);
wordFaResult.setAllExterMatchWords(exterMatchRate, wordsCount);
}
}
//删除该文本段,不再进行比较
fileSrcTextMap.remove(rowId);
it = fileSrcTextMap.entrySet().iterator();
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
}
wordsFAResultMap.put(filePath, wordFaResult);
}
if (!handler.monitorWork(monitor, matchTravelTuIndex, workInterval, false)) {
return null;
}
}
}
lockRepeatTU(wordsFAResultMap);
return wordsFAResultMap;
}
use of net.heartsome.cat.common.bean.TranslationUnitAnalysisResult in project translationstudio8 by heartsome.
the class DBOperator method translationUnitAnalysis.
public void translationUnitAnalysis(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, TranslationUnitAnalysisResults analysisResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
this.commit();
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch-wordsFA");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
textDataSql = textDataSql.replace("__TARGETLANG__", tgtLang);
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
// SELECT A.TPKID, A.GROUPID, A.PURE, A.CONTENT, A.PRECONTEXT, A.NEXTCONTEXT FROM TEXTDATA A INNER JOIN
// TEXTDATA B
// ON A.GROUPID = B.GROUPID WHERE A.TPKID IN (__SET__) AND B.LANG = '__TARGETLANG__' ORDER BY A.GROUPID
// DESC;
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
if (similarity == 100 && CommonFunction.checkEdition("U")) {
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
TranslationUnitAnalysisResult r = new TranslationUnitAnalysisResult(similarity, dbName);
analysisResults.add(r);
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
Aggregations