use of net.heartsome.cat.common.bean.TmxTU in project translationstudio8 by heartsome.
the class ImportTmx method checkDuplicate.
private List<TmxTU> checkDuplicate(TmxTU tu) throws SQLException {
TmxSegement srcTuv = tu.getSource();
List<TmxSegement> tuvs = tu.getSegments();
if (srcTuv == null || tuvs == null || tu.getSegments().size() == 0) {
return null;
}
String pureText = srcTuv.getPureText();
if (pureText == null) {
return null;
}
int hash = pureText.hashCode();
String tuId = tu.getTuId();
if (tuId == null || tuId.equals("")) {
tuId = generateTuId();
tu.setTuId(tuId);
}
// long l = System.currentTimeMillis();
List<TmxTU> dbTus = db.getTUInfoByTuvInfo(hash, Utils.convertLangCode(srcTuv.getLangCode()), tuId);
// System.out.println("checkDuplicate tu: "+ (System.currentTimeMillis() - l));
return dbTus;
}
use of net.heartsome.cat.common.bean.TmxTU in project translationstudio8 by heartsome.
the class ImportTmx method flush.
private void flush() throws SQLException {
if (importStrategy == Constants.IMPORT_MODEL_ALWAYSADD) {
// 始终增加
// transaction control
// long l = System.currentTimeMillis();
db.beginTransaction();
try {
for (TmxTU tu : tmxTuCache) {
addTu(tu);
monitor.worked(1);
if (monitor != null && monitor.isCanceled()) {
break;
}
}
} catch (SQLException e) {
db.rollBack();
throw e;
}
db.commit();
tmxTuCache.clear();
// System.out.println((System.currentTimeMillis() - l));
} else if (importStrategy == Constants.IMPORT_MODEL_OVERWRITE) {
Map<TmxTU, List<TmxTU>> duplicateTuCache = new HashMap<TmxTU, List<TmxTU>>();
// Map<TmxTU, List<TmxSegement>> duplicateTuvPkCache = new HashMap<TmxTU, List<TmxSegement>>();
// long l2 = System.currentTimeMillis();
List<TmxTU> filterSrcSameTu = filterSrcSameTu(tmxTuCache, Constants.IMPORT_MODEL_OVERWRITE);
for (TmxTU tu : filterSrcSameTu) {
if (monitor != null && monitor.isCanceled()) {
return;
}
// long l1 = System.currentTimeMillis();
List<TmxTU> dbTus = checkDuplicate(tu);
if (dbTus == null) {
// error TU
continue;
}
// cached the DB exist TU
duplicateTuCache.put(tu, dbTus);
if (dbTus.size() != 0) {
// long l = System.currentTimeMillis();
for (TmxTU dbTu : dbTus) {
List<TmxSegement> tuvs = tu.getSegments();
List<TmxSegement> dbTuvs = new ArrayList<TmxSegement>();
for (TmxSegement tuv : tuvs) {
dbTuvs.addAll(db.getTextDataIdByGroupIdLang(dbTu.getTmId(), "M", tuv.getLangCode()));
}
dbTu.setSegments(dbTuvs);
List<TmxProp> dbProps = db.getTuMprops(dbTu.getTmId(), "TU");
dbTu.setProps(dbProps);
List<TmxNote> dbNotes = db.getTuMNote(dbTu.getTmId(), "TU");
dbTu.setNotes(dbNotes);
// duplicateTuvPkCache.put(dbTu, dbTuvs);
}
// System.out.println("getTextDataidByGroup:"+ (System.currentTimeMillis() - l));
}
// System.out.println("tu check with db:"+ (System.currentTimeMillis() - l1));
}
// System.out.println("200 tu check with db:"+ (System.currentTimeMillis() - l2));
tmxTuCache.clear();
filterSrcSameTu.clear();
// Transaction control
db.beginTransaction();
try {
Iterator<TmxTU> tuIt = duplicateTuCache.keySet().iterator();
while (tuIt.hasNext()) {
if (monitor != null && monitor.isCanceled()) {
break;
}
TmxTU tu = tuIt.next();
List<TmxTU> dbTus = duplicateTuCache.get(tu);
if (dbTus.size() == 0) {
addTu(tu);
} else {
for (TmxTU dbTu : dbTus) {
String changeDate = tu.getChangeDate() == null ? "" : tu.getChangeDate();
String changeId = tu.getChangeUser() == null ? "" : tu.getChangeUser();
String dbChangeDate = dbTu.getChangeDate() == null ? "" : dbTu.getChangeDate();
String dbChangeId = dbTu.getChangeUser() == null ? "" : dbTu.getChangeUser();
// Update TU Attribute (XmlElement Attribute)
if (!changeDate.equals(dbChangeDate) || !changeId.equals(dbChangeId)) {
db.updateTuChangeInfo(dbTu.getTmId(), tu.getTuId(), changeId, changeDate);
}
// Update Prop
List<TmxProp> props = tu.getProps();
List<TmxProp> dbProps = dbTu.getProps();
if (dbProps.size() == 0 && props != null) {
for (TmxProp _prop : props) {
db.insertTMXProp(dbTu.getTmId(), "TU", _prop.getName(), null, null, _prop.getValue());
}
} else if (props != null) {
for (TmxProp prop : props) {
boolean exist = false;
for (TmxProp dbProp : dbProps) {
if (prop.equals(dbProp)) {
exist = true;
break;
}
}
if (!exist) {
db.deleteMprop("TU", dbTu.getTmId() + "");
for (TmxProp _prop : props) {
db.insertTMXProp(dbTu.getTmId(), "TU", _prop.getName(), null, null, _prop.getValue());
}
break;
}
}
}
// Update Note
List<TmxNote> notes = tu.getNotes();
List<TmxNote> dbNotes = dbTu.getNotes();
if (dbNotes.size() == 0 && notes != null) {
for (TmxNote note : notes) {
db.insertTMXNote(dbTu.getTmId(), "TU", note.getContent(), null, null, null, null, note.getEncoding(), note.getXmlLang());
}
} else if (notes != null) {
for (TmxNote note : notes) {
boolean exist = false;
for (TmxNote dbNote : dbNotes) {
if (note.equals(dbNote)) {
exist = true;
break;
}
}
if (!exist) {
db.deleteMNote("TU", dbTu.getTmId() + "");
for (TmxNote _note : notes) {
db.insertTMXNote(dbTu.getTmId(), "TU", _note.getContent(), null, null, null, null, _note.getEncoding(), _note.getXmlLang());
}
break;
}
}
}
// Update TUVS except source TUV
List<TmxSegement> tuvs = tu.getSegments();
if (tuvs != null) {
for (TmxSegement tuv : tuvs) {
List<TmxSegement> dbTuvs = dbTu.getSegments();
String lang = tuv.getLangCode();
String content = tuv.getFullText();
if (content == null) {
continue;
}
if (dbTuvs.size() != 0) {
boolean flg = false;
boolean isDuplicate = false;
for (TmxSegement dbTuv : dbTuvs) {
if (dbTuv.getLangCode().equalsIgnoreCase(lang)) {
String dbContent = dbTuv.getFullText();
if (!dbContent.equals(content)) {
db.deleteAllTuvRelations(Arrays.asList(new Integer[] { dbTuv.getDbPk() }), lang);
flg = true;
}
isDuplicate = true;
}
}
if (flg == true || !isDuplicate) {
addTuv(dbTu.getTmId(), tuv, null, null);
}
} else {
addTuv(dbTu.getTmId(), tuv, null, null);
}
}
}
// Update context
TmxContexts dbContexts = dbTu.getContexts();
if (tu.getContexts() != null) {
String preContext = tu.getContexts().getPreContext();
String nextContext = tu.getContexts().getNextContext();
if (dbContexts == null) {
db.updateTuvContext(dbTu.getTmId(), tu.getSource().getLangCode(), preContext, nextContext);
} else {
String dbPreContext = dbContexts.getPreContext();
String dbNextContext = dbContexts.getNextContext();
if (!dbPreContext.equals(preContext) || !dbNextContext.equals(nextContext)) {
db.updateTuvContext(dbTu.getTmId(), tu.getSource().getLangCode(), preContext, nextContext);
}
}
}
}
}
monitor.worked(1);
}
} catch (SQLException e) {
db.rollBack();
throw e;
}
db.commit();
duplicateTuCache.clear();
} else if (importStrategy == Constants.IMPORT_MODEL_IGNORE) {
List<TmxTU> needAddTus = new ArrayList<TmxTU>();
List<TmxTU> filterSrcSameTu = filterSrcSameTu(tmxTuCache, Constants.IMPORT_MODEL_IGNORE);
for (TmxTU tu : filterSrcSameTu) {
if (monitor != null && monitor.isCanceled()) {
return;
}
List<TmxTU> dbTus = checkDuplicate(tu);
if (dbTus == null) {
// error TU
continue;
}
// cached the DB exist TU
if (dbTus.size() == 0) {
needAddTus.add(tu);
}
}
tmxTuCache.clear();
filterSrcSameTu.clear();
// Transaction control
db.beginTransaction();
try {
for (TmxTU tu : needAddTus) {
if (monitor != null && monitor.isCanceled()) {
break;
}
addTu(tu);
monitor.worked(1);
}
} catch (SQLException e) {
db.rollBack();
throw e;
}
db.commit();
needAddTus.clear();
}
}
use of net.heartsome.cat.common.bean.TmxTU in project translationstudio8 by heartsome.
the class Row method toTmxTu.
public TmxTU toTmxTu() {
TmxTU tu = new TmxTU();
TmxSegement seg = null;
String cellContent = null;
for (int i = 0; i < cells.size(); i++) {
Cell cell = cells.get(i);
cellContent = cell.getCellConentent();
if (null != cell && null != cellContent && !cellContent.trim().isEmpty()) {
cellContent = TextUtil.cleanSpecialString(cellContent);
seg = new TmxSegement();
seg.setFullText(cellContent);
seg.setPureText(cellContent);
seg.setLangCode(cell.getLangCode());
if (i == 0) {
tu.setSource(seg);
} else {
tu.appendSegement(seg);
}
}
}
return tu;
}
use of net.heartsome.cat.common.bean.TmxTU in project translationstudio8 by heartsome.
the class TmUtils method fuzzyResult2Alttransbean.
public static Vector<AltTransBean> fuzzyResult2Alttransbean(List<FuzzySearchResult> fuzzyResults) {
Vector<AltTransBean> altTrans = new Vector<AltTransBean>();
for (FuzzySearchResult result : fuzzyResults) {
AltTransBean atb = new AltTransBean();
// Map<String, String> match = tu.getTuInfo();
TmxTU tu = result.getTu();
// 获取源节点内容、属性及纯文本
atb.setSrcText(tu.getSource().getPureText());
atb.setTgtText(tu.getTarget().getPureText());
Hashtable<String, String> matchProps = new Hashtable<String, String>();
matchProps.put("match-quality", result.getSimilarity() + "");
matchProps.put("origin", result.getDbName());
matchProps.put("tool-id", "Translation Memory");
matchProps.put("hs:matchType", "TM");
matchProps.put("xml:space", "default");
atb.setMatchProps(matchProps);
Hashtable<String, String> srcProps = new Hashtable<String, String>();
srcProps.put("xml:lang", tu.getSource().getLangCode());
atb.setSrcProps(srcProps);
atb.setSrcContent(tu.getSource().getFullText());
Hashtable<String, String> tgtProps = new Hashtable<String, String>();
tgtProps.put("xml:lang", tu.getTarget().getLangCode());
atb.setTgtProps(tgtProps);
atb.setTgtContent(tu.getTarget().getFullText());
Vector<PropGroupBean> pgs = new Vector<PropGroupBean>();
Vector<PropBean> props = new Vector<PropBean>();
PropBean pb = new PropBean("creationId", tu.getCreationUser());
props.add(pb);
pb = new PropBean("creationDate", tu.getCreationDate());
props.add(pb);
pb = new PropBean("changeId", tu.getChangeUser());
props.add(pb);
pb = new PropBean("changeDate", tu.getChangeDate());
props.add(pb);
List<TmxProp> attrValList = tu.getProps();
for (TmxProp attr : attrValList) {
String name = attr.getName();
if (name == null || name.equals("")) {
continue;
}
String value = attr.getValue();
if (value == null || value.equals("")) {
continue;
}
PropBean prop = new PropBean(name, value);
props.add(prop);
}
PropGroupBean pg = new PropGroupBean(props);
// 获取属性组名称。
pg.setName("hs:prop-group");
pgs.add(pg);
atb.setPropGroups(pgs);
atb.setFuzzyResult(result);
altTrans.add(atb);
}
return altTrans;
}
use of net.heartsome.cat.common.bean.TmxTU in project translationstudio8 by heartsome.
the class TMDatabaseImpl method fuzzySearch.
@Override
public void fuzzySearch(String pureText, String fullText, String srcLang, String tgtLang, int minSimilarity, boolean caseSensitive, int matchUpperLimit, int contextSize, String preHash, String nextHash, boolean isIngoreTarget, FuzzySearchResults searchResults, int tagPelanty) throws SQLException {
int[] ngrams = generateNgrams(srcLang, pureText);
int size = ngrams.length;
if (size == 0) {
return;
}
// long l1 = System.currentTimeMillis();
int min = size * minSimilarity / 100;
int max = size * 100 / minSimilarity;
Map<String, Integer> tpkids = getCandidatesTextDataPks(srcLang, min, max, ngrams);
// System.out.println("查MATEX_LANG表:"+(System.currentTimeMillis() - l1));
// 构建SQL
Iterator<Entry<String, Integer>> it = tpkids.entrySet().iterator();
StringBuffer bf = new StringBuffer();
// long l = System.currentTimeMillis();
while (it.hasNext()) {
Entry<String, Integer> entry = it.next();
String tpkid = entry.getKey();
float c = entry.getValue();
if (c >= min && c <= max) {
bf.append(",");
bf.append(tpkid);
}
}
if (bf.toString().equals("")) {
return;
}
String tag = TranslationMemoryTools.getInnerTagContent(fullText);
String textDataSql = dbConfig.getOperateDbSQL("fuzzySearch");
textDataSql = textDataSql.replace("__SET__", bf.toString().substring(1));
Statement stm = null;
ResultSet rs = null;
Statement tmpStm = null;
try {
stm = conn.createStatement();
tmpStm = conn.createStatement();
rs = stm.executeQuery(textDataSql);
// SELECT TPKID, GROUPID, PURE, CONTENT, PRECONTEXT, NEXTCONTEXT FROM TEXTDATA WHERE TPKID IN (__SET__)
String targetSql = dbConfig.getOperateDbSQL("fuzzySearch-target").replace("__LANG__", tgtLang);
String dbName = getMetaData().getDatabaseName();
while (rs.next()) {
String _pureText = rs.getString(3);
String _fullText = rs.getString(4);
int similarity = 0;
if (caseSensitive) {
similarity = similarity(pureText, _pureText);
} else {
similarity = similarity(pureText.toLowerCase(), _pureText.toLowerCase());
}
String _tag = TranslationMemoryTools.getInnerTagContent(_fullText);
if (!isIngoreTarget && !tag.equals(_tag)) {
// 标记内容不相等,则执行罚分
similarity -= tagPelanty;
}
if (similarity < minSimilarity) {
continue;
}
int tuId = rs.getInt(2);
String temptargetSql = targetSql.replace("__GROUPID__", tuId + "");
// PURE, CONTENT, CREATIONID, CREATIONDATE, CHANGEID, CHANGEDATE ,PROJECTREF
ResultSet rs1 = null;
try {
rs1 = tmpStm.executeQuery(temptargetSql);
if (rs1.next()) {
TmxSegement source = new TmxSegement(_pureText, _fullText, srcLang);
source.setDbPk(rs.getInt(1));
_pureText = rs1.getString(2);
_fullText = rs1.getString(3);
if (_pureText == null || _pureText.equals("") || _fullText == null || _fullText.equals("")) {
continue;
}
TmxSegement target = new TmxSegement(_pureText, _fullText, tgtLang);
target.setDbPk(rs1.getInt(1));
TmxTU tu = new TmxTU(source, target);
FuzzySearchResult searchRs = new FuzzySearchResult(tu);
if (searchResults.contains(searchRs)) {
continue;
}
String creationId = rs1.getString(4);
creationId = creationId == null ? "" : creationId;
String creationDate = "";
Timestamp tempCdate = rs1.getTimestamp(5);
if (tempCdate != null) {
creationDate = DateUtils.formatToUTC(tempCdate.getTime());
}
String changeid = rs1.getString(6);
changeid = changeid == null ? "" : changeid;
String changeDate = "";
Timestamp tempChangeDate = rs1.getTimestamp(7);
if (tempChangeDate != null) {
changeDate = DateUtils.formatToUTC(tempChangeDate.getTime());
}
String projectRef = rs1.getString(8);
projectRef = projectRef == null ? "" : projectRef;
tu.setCreationDate(creationDate);
tu.setCreationUser(creationId);
tu.setChangeDate(changeDate);
tu.setChangeUser(changeid);
List<TmxProp> attrs = getTuMprops(tuId, "TU");
tu.setProps(attrs);
String preContext = rs.getString(5);
String nextContext = rs.getString(6);
tu.appendContext(TmxContexts.PRE_CONTEXT_NAME, preContext);
tu.appendContext(TmxContexts.NEXT_CONTEXT_NAME, nextContext);
if (similarity == 100 && CommonFunction.checkEdition("U")) {
if (preContext != null && nextContext != null) {
String[] preContexts = preContext.split(",");
String[] nextContexts = nextContext.split(",");
if (preContexts.length > contextSize) {
//$NON-NLS-1$
preContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
preContext += "," + preContexts[i];
}
if (!"".equals(preContext)) {
//$NON-NLS-1$
preContext = preContext.substring(1);
}
}
if (nextContexts.length > contextSize) {
//$NON-NLS-1$
nextContext = "";
for (int i = 0; i < contextSize; i++) {
//$NON-NLS-1$
nextContext += "," + nextContexts[i];
}
if (!"".equals(nextContext)) {
//$NON-NLS-1$
nextContext = nextContext.substring(1);
}
}
if (preHash.equals(preContext) && nextHash.equals(nextContext)) {
similarity = 101;
}
}
}
searchRs.setDbName(dbName);
searchRs.setSimilarity(similarity);
searchRs.setDbOp(this);
searchRs.getTu().setTmId(tuId);
searchResults.add(searchRs);
}
} finally {
if (rs1 != null) {
rs1.close();
}
}
}
} finally {
if (rs != null) {
rs.close();
}
if (stm != null) {
stm.close();
}
if (tmpStm != null) {
tmpStm.close();
}
}
}
Aggregations