use of net.heartsome.cat.common.bean.TmxSegement in project translationstudio8 by heartsome.
the class Row method toTmxTu.
public TmxTU toTmxTu() {
TmxTU tu = new TmxTU();
TmxSegement seg = null;
String cellContent = null;
for (int i = 0; i < cells.size(); i++) {
Cell cell = cells.get(i);
cellContent = cell.getCellConentent();
if (null != cell && null != cellContent && !cellContent.trim().isEmpty()) {
cellContent = TextUtil.cleanSpecialString(cellContent);
seg = new TmxSegement();
seg.setFullText(cellContent);
seg.setPureText(cellContent);
seg.setLangCode(cell.getLangCode());
if (i == 0) {
tu.setSource(seg);
} else {
tu.appendSegement(seg);
}
}
}
return tu;
}
use of net.heartsome.cat.common.bean.TmxSegement in project translationstudio8 by heartsome.
the class TMDatabaseImpl method fuzzySearch.
@Override
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID, GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String temptargetSql = targetSql.replace("__GROUPID__", tuId + "");
// PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(temptargetSql);
if (rs1.next()) {
TmxSegement source = new TmxSegement(_pureText, _fullText, srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
TmxSegement target = new TmxSegement(_pureText, _fullText, tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TmxSegement in project translationstudio8 by heartsome.
the class TMDatabaseImpl method fuzzySearch.
@Override
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
List<String> tpkids = getCandidatesTextDataPks4Oracle(srcLang, minSimilarity, ngrams);
// 过虑拆分条件,解决Oracle中where xx in (参数) 参数不越过1000个
StringBuffer bf = new StringBuffer();
List<String> tmpTpkids = new ArrayList<String>();
for (int i = 0; i < tpkids.size(); i++) {
String tpkid = tpkids.get(i);
bf.append(",");
bf.append(tpkid);
if ((i + 1) % 100 == 0) {
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
}
}
if (bf.toString().equals("")) {
return;
}
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
bf.append("TPKID IN (" + tmpTpkids.get(0) + ")");
for (int i = 1; i < tmpTpkids.size(); i++) {
bf.append(" OR TPKID IN (" + tmpTpkids.get(i) + ")");
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__WHERE__", bf.toString());
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID ,GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String targetSqlTemp = targetSql.replace("__GROUPID__", tuId + "");
// PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(targetSqlTemp);
if (rs1.next()) {
TmxSegement source = new TmxSegement(_pureText, _fullText, srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
TmxSegement target = new TmxSegement(_pureText, _fullText, tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
preContext = preContext == null ? "" : preContext;
nextContext = nextContext == null ? "" : nextContext;
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TmxSegement in project translationstudio8 by heartsome.
the class DBOperator method fuzzySearch.
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
this.commit();
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
// SELECT TPKID, GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__) ORDER BY
// GROUPID DESC
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID, PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE, PROJECTREF
// FROM TEXTDATA INNER JOIN MTU ON MTU.MTUPKID = TEXTDATA.GROUPID AND TEXTDATA.GROUPID = __GROUPID__ AND
// TEXTDATA.LANG = '__LANG__'
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String temptargetSql = targetSql.replace("__GROUPID__", tuId + "");
// TPKID, PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(temptargetSql);
if (rs1.next()) {
// fix Bug #3406 by yule --xliff中的标记可能与TMX标记不兼容。
TmxSegement source = new TmxSegement(_pureText, InnerTagClearUtil.clearTmx4Xliff(_fullText), srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
// fix Bug #3406 by yule --xliff中的标记可能与TMX标记不兼容
TmxSegement target = new TmxSegement(_pureText, InnerTagClearUtil.clearTmx4Xliff(_fullText), tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TmxSegement in project translationstudio8 by heartsome.
the class DBOperator method getTextDataIdByGroupIdLang.
/**
* 获取某一MTU下的对应语言的textData
* @param groupId
* MTU表的主键
* @param type
* 类型,区分TMX和TBX
* @param lang
* 语言代码
* @return TEXTDATA的主键集合
* @throws SQLException
* ;
*/
public List<TmxSegement> getTextDataIdByGroupIdLang(int groupId, String type, String lang) throws SQLException {
List<TmxSegement> r = new ArrayList<TmxSegement>();
PreparedStatement stmt = null;
ResultSet rs = null;
try {
String sql = dbConfig.getOperateDbSQL("get-textdataid-bygroupidlang");
stmt = conn.prepareStatement(sql);
stmt.setInt(1, groupId);
stmt.setString(2, type);
stmt.setString(3, lang);
rs = stmt.executeQuery();
while (rs.next()) {
int pk = rs.getInt(1);
String content = rs.getString(2);
if (content == null) {
continue;
}
TmxSegement tuv = new TmxSegement();
tuv.setDbPk(pk);
tuv.setFullText(content);
tuv.setLangCode(lang);
r.add(tuv);
}
} finally {
if (rs != null) {
rs.close();
}
if (stmt != null) {
stmt.close();
}
}
return r;
}
Aggregations