use of net.heartsome.cat.common.bean.FuzzySearchResult in project translationstudio8 by heartsome.
the class TmUtils method fuzzyResult2Alttransbean.
public static Vector<AltTransBean> fuzzyResult2Alttransbean(List<FuzzySearchResult> fuzzyResults) {
Vector<AltTransBean> altTrans = new Vector<AltTransBean>();
for (FuzzySearchResult result : fuzzyResults) {
AltTransBean atb = new AltTransBean();
// Map<String, String> match = tu.getTuInfo();
TmxTU tu = result.getTu();
// 获取源节点内容、属性及纯文本
atb.setSrcText(tu.getSource().getPureText());
atb.setTgtText(tu.getTarget().getPureText());
Hashtable<String, String> matchProps = new Hashtable<String, String>();
matchProps.put("match-quality", result.getSimilarity() + "");
matchProps.put("origin", result.getDbName());
matchProps.put("tool-id", "Translation Memory");
matchProps.put("hs:matchType", "TM");
matchProps.put("xml:space", "default");
atb.setMatchProps(matchProps);
Hashtable<String, String> srcProps = new Hashtable<String, String>();
srcProps.put("xml:lang", tu.getSource().getLangCode());
atb.setSrcProps(srcProps);
atb.setSrcContent(tu.getSource().getFullText());
Hashtable<String, String> tgtProps = new Hashtable<String, String>();
tgtProps.put("xml:lang", tu.getTarget().getLangCode());
atb.setTgtProps(tgtProps);
atb.setTgtContent(tu.getTarget().getFullText());
Vector<PropGroupBean> pgs = new Vector<PropGroupBean>();
Vector<PropBean> props = new Vector<PropBean>();
PropBean pb = new PropBean("creationId", tu.getCreationUser());
props.add(pb);
pb = new PropBean("creationDate", tu.getCreationDate());
props.add(pb);
pb = new PropBean("changeId", tu.getChangeUser());
props.add(pb);
pb = new PropBean("changeDate", tu.getChangeDate());
props.add(pb);
List<TmxProp> attrValList = tu.getProps();
for (TmxProp attr : attrValList) {
String name = attr.getName();
if (name == null || name.equals("")) {
continue;
}
String value = attr.getValue();
if (value == null || value.equals("")) {
continue;
}
PropBean prop = new PropBean(name, value);
props.add(prop);
}
PropGroupBean pg = new PropGroupBean(props);
// 获取属性组名称。
pg.setName("hs:prop-group");
pgs.add(pg);
atb.setPropGroups(pgs);
atb.setFuzzyResult(result);
altTrans.add(atb);
}
return altTrans;
}
use of net.heartsome.cat.common.bean.FuzzySearchResult in project translationstudio8 by heartsome.
the class PreTranslation method keepCurrentMatchs.
private void keepCurrentMatchs(VTDUtils vu, String srcLang, String tgtLang, XMLModifier xm, IProgressMonitor monitor) throws NavException, XPathParseException, XPathEvalException, ModifyException, UnsupportedEncodingException, InterruptedException {
AutoPilot tuAp = new AutoPilot(vu.getVTDNav());
tuAp.selectXPath("./body//trans-unit");
boolean needUpdateTgt = true;
while (tuAp.evalXPath() != -1) {
// 循环 Trans-unit
if (monitor != null && monitor.isCanceled()) {
throw new InterruptedException();
}
// skip locked segment
String locked = vu.getCurrentElementAttribut("translate", "yes");
if (locked.equals("no")) {
continue;
}
String tgtContent = vu.getElementContent("./target");
if (tgtContent != null && !tgtContent.trim().equals("")) {
needUpdateTgt = false;
}
TransUnitInfo2TranslationBean tuInfo = getTransUnitInfo(vu);
if (tuInfo == null) {
continue;
}
tuInfo.setSrcLanguage(srcLang);
tuInfo.setTgtLangugage(tgtLang);
getTuContext(vu, contextSize, tuInfo);
List<FuzzySearchResult> result = tmMatcher.executeFuzzySearch(currentProject, tuInfo);
updateXliffFile(vu, tuInfo, result, xm, needUpdateTgt);
needUpdateTgt = true;
monitor.worked(1);
}
}
use of net.heartsome.cat.common.bean.FuzzySearchResult in project translationstudio8 by heartsome.
the class TmUtils method altTransInfoConverter.
/**
* 将从库中获取的匹配转成以 AltTransBean 封装的匹配数据,在转换的过程与当前 AltTrans重复的记录将被忽略
* @param dbMatches
* 从数据库中获取的匹配
* @param currentAltTrans
* 当前已经存原altTrans
* @return 和当前匹配不重复的AltTrans集;
*/
public static Vector<AltTransBean> altTransInfoConverter(List<FuzzySearchResult> dbMatches, Vector<AltTransBean> currentAltTrans) {
Vector<AltTransBean> altTrans = new Vector<AltTransBean>();
Vector<AltTransBean> existAltTrans = new Vector<AltTransBean>();
for (FuzzySearchResult result : dbMatches) {
AltTransBean atb = new AltTransBean();
// Map<String, String> match = tu.getTuInfo();
TmxTU tu = result.getTu();
// 获取源节点内容、属性及纯文本
atb.setSrcText(tu.getSource().getPureText());
atb.setTgtText(tu.getTarget().getPureText());
if (isMatchExist(currentAltTrans, atb, result.getDbName(), existAltTrans)) {
continue;
}
Hashtable<String, String> matchProps = new Hashtable<String, String>();
matchProps.put("match-quality", result.getSimilarity() + "");
matchProps.put("origin", result.getDbName());
matchProps.put("tool-id", "Translation Memory");
matchProps.put("hs:matchType", "TM");
matchProps.put("xml:space", "default");
atb.setMatchProps(matchProps);
Hashtable<String, String> srcProps = new Hashtable<String, String>();
srcProps.put("xml:lang", tu.getSource().getLangCode());
atb.setSrcProps(srcProps);
atb.setSrcContent(tu.getSource().getFullText());
Hashtable<String, String> tgtProps = new Hashtable<String, String>();
tgtProps.put("xml:lang", tu.getTarget().getLangCode());
atb.setTgtProps(tgtProps);
atb.setTgtContent(tu.getTarget().getFullText());
Vector<PropGroupBean> pgs = new Vector<PropGroupBean>();
Vector<PropBean> props = new Vector<PropBean>();
PropBean pb = new PropBean("creationId", tu.getCreationUser());
props.add(pb);
pb = new PropBean("creationDate", tu.getCreationDate());
props.add(pb);
pb = new PropBean("changeId", tu.getChangeUser());
props.add(pb);
pb = new PropBean("changeDate", tu.getChangeDate());
props.add(pb);
List<TmxProp> attrValList = tu.getProps();
for (TmxProp attr : attrValList) {
String name = attr.getName();
if (name == null || name.equals("")) {
continue;
}
String value = attr.getValue();
if (value == null || value.equals("")) {
continue;
}
PropBean prop = new PropBean(name, value);
props.add(prop);
}
PropGroupBean pg = new PropGroupBean(props);
// 获取属性组名称。
pg.setName("hs:prop-group");
pgs.add(pg);
atb.setPropGroups(pgs);
altTrans.add(atb);
}
if (altTrans.size() > 0) {
altTrans.addAll(existAltTrans);
} else {
currentAltTrans.addAll(existAltTrans);
}
return altTrans;
}
use of net.heartsome.cat.common.bean.FuzzySearchResult in project translationstudio8 by heartsome.
the class TMDatabaseImpl method fuzzySearch.
@Override
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID, GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String temptargetSql = targetSql.replace("__GROUPID__", tuId + "");
// PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(temptargetSql);
if (rs1.next()) {
TmxSegement source = new TmxSegement(_pureText, _fullText, srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
TmxSegement target = new TmxSegement(_pureText, _fullText, tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
use of net.heartsome.cat.common.bean.FuzzySearchResult in project translationstudio8 by heartsome.
the class TMDatabaseImpl method fuzzySearch.
@Override
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
List<String> tpkids = getCandidatesTextDataPks4Oracle(srcLang, minSimilarity, ngrams);
// 过虑拆分条件,解决Oracle中where xx in (参数) 参数不越过1000个
StringBuffer bf = new StringBuffer();
List<String> tmpTpkids = new ArrayList<String>();
for (int i = 0; i < tpkids.size(); i++) {
String tpkid = tpkids.get(i);
bf.append(",");
bf.append(tpkid);
if ((i + 1) % 100 == 0) {
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
}
}
if (bf.toString().equals("")) {
return;
}
tmpTpkids.add(bf.toString().substring(1));
bf = new StringBuffer();
bf.append("TPKID IN (" + tmpTpkids.get(0) + ")");
for (int i = 1; i < tmpTpkids.size(); i++) {
bf.append(" OR TPKID IN (" + tmpTpkids.get(i) + ")");
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__WHERE__", bf.toString());
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID ,GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String targetSqlTemp = targetSql.replace("__GROUPID__", tuId + "");
// PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(targetSqlTemp);
if (rs1.next()) {
TmxSegement source = new TmxSegement(_pureText, _fullText, srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
TmxSegement target = new TmxSegement(_pureText, _fullText, tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
preContext = preContext == null ? "" : preContext;
nextContext = nextContext == null ? "" : nextContext;
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
Aggregations