use of legato.keys.def.KeyList in project legato by DOREMUS-ANR.
the class KeysClassifier method getBestKey.
public static HashSet<String> getBestKey(Model srcModel, Model tgtModel, File dirCluster) throws IOException {
legato = LEGATO.getInstance();
/**
*****************************************************************************************
** Place all Literals (in resources CBD) to a distance = 1
* Reasons :
***********+ SAKey considers blank nodes as "Strings"
***********+ SILK gives different results when comparing property values whose distance > 1
******************************************************************************************
*/
// srcModel = ModelManager.rewrite(srcModel);
// tgtModel = ModelManager.rewrite(tgtModel);
/**
********
* Filter triples whose properties are common for both datasets
*********
*/
List<Property> commonProperties = getCommonProperties(srcModel, tgtModel);
srcModel = ModelManager.getFilteredTriples(srcModel, commonProperties);
tgtModel = ModelManager.getFilteredTriples(tgtModel, commonProperties);
/**
********
* Save the 2 models temporarily in 2 RDF files in "N-TRIPLES" (The only format accepted by SAKey)
*********
*/
FileManager.createRDFile(dirCluster, "source", srcModel, "nt");
FileManager.createRDFile(dirCluster, "target", tgtModel, "nt");
/**
*****
* The keys of the "source" and "target" datasets are saved in "srcKeys" and "tgtKeys" respectively
******
*/
KeyList srcKeys = new KeyList();
KeyList tgtKeys = new KeyList();
File srcFile = new File(dirCluster.getAbsolutePath() + File.separator + "source.nt");
File tgtFile = new File(dirCluster.getAbsolutePath() + File.separator + "target.nt");
srcKeys = Sakey.extractKeys(srcFile, srcKeys);
tgtKeys = Sakey.extractKeys(tgtFile, tgtKeys);
/**
*******
* Merge the 2 sets of keys
********
*/
// keySet1 = all the keys of "srcKeys"
HashSet<Key> keySet1 = new HashSet();
// keySet2 = all the keys of "tgtKeys"
HashSet<Key> keySet2 = new HashSet();
Iterator iter1 = srcKeys.iterator();
while (iter1.hasNext()) keySet1.add((Key) iter1.next());
Iterator iter2 = tgtKeys.iterator();
while (iter2.hasNext()) keySet2.add((Key) iter2.next());
HashSet<HashSet<Key>> keySets = new HashSet<HashSet<Key>>();
keySets.add(keySet1);
keySets.add(keySet2);
KeyList mergedKeys = new KeyList();
mergedKeys = mergedKeys.merge(keySets);
/**
******
* Keys Ranking
*******
*/
HashSet<String> bestKey = SupportMergedKeys.rank(mergedKeys, srcFile, tgtFile);
return bestKey;
}