use of net.heartsome.cat.common.bean.TmxSegement in project translationstudio8 by heartsome.
the class DBOperator method fuzzySearch.
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
this.commit();
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
// SELECT TPKID, GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__) ORDER BY
// GROUPID DESC
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID, PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE, PROJECTREF
// FROM TEXTDATA INNER JOIN MTU ON MTU.MTUPKID = TEXTDATA.GROUPID AND TEXTDATA.GROUPID = __GROUPID__ AND
// TEXTDATA.LANG = '__LANG__'
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String temptargetSql = targetSql.replace("__GROUPID__", tuId + "");
// TPKID, PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(temptargetSql);
if (rs1.next()) {
// fix Bug #3406 by yule --xliff中的标记可能与TMX标记不兼容。
TmxSegement source = new TmxSegement(_pureText, InnerTagClearUtil.clearTmx4Xliff(_fullText), srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
// fix Bug #3406 by yule --xliff中的标记可能与TMX标记不兼容
TmxSegement target = new TmxSegement(_pureText, InnerTagClearUtil.clearTmx4Xliff(_fullText), tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TmxSegement in project translationstudio8 by heartsome.
the class DBOperator method getTextDataIdByGroupIdLang.
/**
* 获取某一MTU下的对应语言的textData
* @param groupId
* MTU表的主键
* @param type
* 类型,区分TMX和TBX
* @param lang
* 语言代码
* @return TEXTDATA的主键集合
* @throws SQLException
* ;
*/
public List<TmxSegement> getTextDataIdByGroupIdLang(int groupId, String type, String lang) throws SQLException {
List<TmxSegement> r = new ArrayList<TmxSegement>();
PreparedStatement stmt = null;
ResultSet rs = null;
try {
String sql = dbConfig.getOperateDbSQL("get-textdataid-bygroupidlang");
stmt = conn.prepareStatement(sql);
stmt.setInt(1, groupId);
stmt.setString(2, type);
stmt.setString(3, lang);
rs = stmt.executeQuery();
while (rs.next()) {
int pk = rs.getInt(1);
String content = rs.getString(2);
if (content == null) {
continue;
}
TmxSegement tuv = new TmxSegement();
tuv.setDbPk(pk);
tuv.setFullText(content);
tuv.setLangCode(lang);
r.add(tuv);
}
} finally {
if (rs != null) {
rs.close();
}
if (stmt != null) {
stmt.close();
}
}
return r;
}
Aggregations