use of net.heartsome.cat.common.bean.TmxProp in project translationstudio8 by heartsome.
the class TMDatabaseImpl method fuzzySearch.
@Override
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
List<String> tpkids = getCandidatesTextDataPks4Oracle(srcLang, minSimilarity, ngrams);
// 过虑拆分条件,解决Oracle中where xx in (参数) 参数不越过1000个
StringBuffer bf = new StringBuffer();
List<String> tmpTpkids = new ArrayList<String>();
for (int i = 0; i < tpkids.size(); i++) {
String tpkid = tpkids.get(i);
bf.append(",");
bf.append(tpkid);
if ((i + 1) % 100 == 0) {
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
}
}
if (bf.toString().equals("")) {
return;
}
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
bf.append("TPKID IN (" + tmpTpkids.get(0) + ")");
for (int i = 1; i < tmpTpkids.size(); i++) {
bf.append(" OR TPKID IN (" + tmpTpkids.get(i) + ")");
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__WHERE__", bf.toString());
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID ,GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String targetSqlTemp = targetSql.replace("__GROUPID__", tuId + "");
// PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(targetSqlTemp);
if (rs1.next()) {
TmxSegement source = new TmxSegement(_pureText, _fullText, srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
TmxSegement target = new TmxSegement(_pureText, _fullText, tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
preContext = preContext == null ? "" : preContext;
nextContext = nextContext == null ? "" : nextContext;
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TmxProp in project translationstudio8 by heartsome.
the class DBOperator method fuzzySearch.
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
this.commit();
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
// SELECT TPKID, GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__) ORDER BY
// GROUPID DESC
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID, PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE, PROJECTREF
// FROM TEXTDATA INNER JOIN MTU ON MTU.MTUPKID = TEXTDATA.GROUPID AND TEXTDATA.GROUPID = __GROUPID__ AND
// TEXTDATA.LANG = '__LANG__'
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String temptargetSql = targetSql.replace("__GROUPID__", tuId + "");
// TPKID, PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(temptargetSql);
if (rs1.next()) {
// fix Bug #3406 by yule --xliff中的标记可能与TMX标记不兼容。
TmxSegement source = new TmxSegement(_pureText, InnerTagClearUtil.clearTmx4Xliff(_fullText), srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
// fix Bug #3406 by yule --xliff中的标记可能与TMX标记不兼容
TmxSegement target = new TmxSegement(_pureText, InnerTagClearUtil.clearTmx4Xliff(_fullText), tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.TmxProp in project translationstudio8 by heartsome.
the class DBOperator method getTuMprops.
// SELECT
// MPPKID, PARENTNAME, PARENTID, PNAME, LANG, ENCODING, CONTENT
// FROM
// MPROP WHERE PARENTID=? AND PARENTNAME=?
public List<TmxProp> getTuMprops(int parentId, String parentName) throws SQLException {
List<TmxProp> result = new ArrayList<TmxProp>();
String sql = dbConfig.getOperateDbSQL("get-mporp-byparentid");
PreparedStatement psmt = null;
ResultSet rs = null;
try {
psmt = conn.prepareStatement(sql);
psmt.setInt(1, parentId);
psmt.setString(2, parentName);
rs = psmt.executeQuery();
while (rs.next()) {
TmxProp av = new TmxProp(rs.getString(4), rs.getString(7));
result.add(av);
}
} finally {
if (rs != null) {
rs.close();
}
if (psmt != null) {
psmt.close();
}
}
return result;
}
use of net.heartsome.cat.common.bean.TmxProp in project translationstudio8 by heartsome.
the class TmxReader method readTuPropElement.
private void readTuPropElement(TmxTU tu) throws VTDException {
VTDNav vn = vu.getVTDNav();
vn.push();
AutoPilot ap = new AutoPilot(vn);
ap.selectXPath("./prop");
while (ap.evalXPath() != -1) {
String content = vu.getElementContent();
if (content == null) {
continue;
}
int inx = vn.getAttrVal("type");
String typeValue = inx != -1 ? vn.toString(inx) : null;
if (typeValue == null) {
continue;
}
if (typeValue.equals(TmxContexts.PRE_CONTEXT_NAME)) {
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, content.trim());
} else if (typeValue.equals(TmxContexts.NEXT_CONTEXT_NAME)) {
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, content.trim());
} else if (typeValue.equals("x-Context")) {
// Trados TMX file
String[] contexts = content.split(",");
if (contexts.length == 2) {
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, contexts[0].trim());
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, contexts[1].trim());
}
} else {
TmxProp p = new TmxProp(typeValue, content);
tu.appendProp(p);
}
}
vn.pop();
}
use of net.heartsome.cat.common.bean.TmxProp in project translationstudio8 by heartsome.
the class TmUtils method altTransInfoConverter.
/**
* 将从库中获取的匹配转成以 AltTransBean 封装的匹配数据,在转换的过程与当前 AltTrans重复的记录将被忽略
* @param dbMatches
* 从数据库中获取的匹配
* @param currentAltTrans
* 当前已经存原altTrans
* @return 和当前匹配不重复的AltTrans集;
*/
public static Vector<AltTransBean> altTransInfoConverter(List<FuzzySearchResult> dbMatches, Vector<AltTransBean> currentAltTrans) {
Vector<AltTransBean> altTrans = new Vector<AltTransBean>();
Vector<AltTransBean> existAltTrans = new Vector<AltTransBean>();
for (FuzzySearchResult result : dbMatches) {
AltTransBean atb = new AltTransBean();
// Map<String, String> match = tu.getTuInfo();
TmxTU tu = result.getTu();
// 获取源节点内容、属性及纯文本
atb.setSrcText(tu.getSource().getPureText());
atb.setTgtText(tu.getTarget().getPureText());
if (isMatchExist(currentAltTrans, atb, result.getDbName(), existAltTrans)) {
continue;
}
Hashtable<String, String> matchProps = new Hashtable<String, String>();
matchProps.put("match-quality", result.getSimilarity() + "");
matchProps.put("origin", result.getDbName());
matchProps.put("tool-id", "Translation Memory");
matchProps.put("hs:matchType", "TM");
matchProps.put("xml:space", "default");
atb.setMatchProps(matchProps);
Hashtable<String, String> srcProps = new Hashtable<String, String>();
srcProps.put("xml:lang", tu.getSource().getLangCode());
atb.setSrcProps(srcProps);
atb.setSrcContent(tu.getSource().getFullText());
Hashtable<String, String> tgtProps = new Hashtable<String, String>();
tgtProps.put("xml:lang", tu.getTarget().getLangCode());
atb.setTgtProps(tgtProps);
atb.setTgtContent(tu.getTarget().getFullText());
Vector<PropGroupBean> pgs = new Vector<PropGroupBean>();
Vector<PropBean> props = new Vector<PropBean>();
PropBean pb = new PropBean("creationId", tu.getCreationUser());
props.add(pb);
pb = new PropBean("creationDate", tu.getCreationDate());
props.add(pb);
pb = new PropBean("changeId", tu.getChangeUser());
props.add(pb);
pb = new PropBean("changeDate", tu.getChangeDate());
props.add(pb);
List<TmxProp> attrValList = tu.getProps();
for (TmxProp attr : attrValList) {
String name = attr.getName();
if (name == null || name.equals("")) {
continue;
}
String value = attr.getValue();
if (value == null || value.equals("")) {
continue;
}
PropBean prop = new PropBean(name, value);
props.add(prop);
}
PropGroupBean pg = new PropGroupBean(props);
// 获取属性组名称。
pg.setName("hs:prop-group");
pgs.add(pg);
atb.setPropGroups(pgs);
altTrans.add(atb);
}
if (altTrans.size() > 0) {
altTrans.addAll(existAltTrans);
} else {
currentAltTrans.addAll(existAltTrans);
}
return altTrans;
}
Aggregations