use of legato.keys.def.Key in project legato by DOREMUS-ANR.
the class KeysClassifier method getBestKey.
public static HashSet<String> getBestKey(Model srcModel, Model tgtModel, File dirCluster) throws IOException {
legato = LEGATO.getInstance();
/**
*****************************************************************************************
** Place all Literals (in resources CBD) to a distance = 1
* Reasons :
***********+ SAKey considers blank nodes as "Strings"
***********+ SILK gives different results when comparing property values whose distance > 1
******************************************************************************************
*/
// srcModel = ModelManager.rewrite(srcModel);
// tgtModel = ModelManager.rewrite(tgtModel);
/**
********
* Filter triples whose properties are common for both datasets
*********
*/
List<Property> commonProperties = getCommonProperties(srcModel, tgtModel);
srcModel = ModelManager.getFilteredTriples(srcModel, commonProperties);
tgtModel = ModelManager.getFilteredTriples(tgtModel, commonProperties);
/**
********
* Save the 2 models temporarily in 2 RDF files in "N-TRIPLES" (The only format accepted by SAKey)
*********
*/
FileManager.createRDFile(dirCluster, "source", srcModel, "nt");
FileManager.createRDFile(dirCluster, "target", tgtModel, "nt");
/**
*****
* The keys of the "source" and "target" datasets are saved in "srcKeys" and "tgtKeys" respectively
******
*/
KeyList srcKeys = new KeyList();
KeyList tgtKeys = new KeyList();
File srcFile = new File(dirCluster.getAbsolutePath() + File.separator + "source.nt");
File tgtFile = new File(dirCluster.getAbsolutePath() + File.separator + "target.nt");
srcKeys = Sakey.extractKeys(srcFile, srcKeys);
tgtKeys = Sakey.extractKeys(tgtFile, tgtKeys);
/**
*******
* Merge the 2 sets of keys
********
*/
// keySet1 = all the keys of "srcKeys"
HashSet<Key> keySet1 = new HashSet();
// keySet2 = all the keys of "tgtKeys"
HashSet<Key> keySet2 = new HashSet();
Iterator iter1 = srcKeys.iterator();
while (iter1.hasNext()) keySet1.add((Key) iter1.next());
Iterator iter2 = tgtKeys.iterator();
while (iter2.hasNext()) keySet2.add((Key) iter2.next());
HashSet<HashSet<Key>> keySets = new HashSet<HashSet<Key>>();
keySets.add(keySet1);
keySets.add(keySet2);
KeyList mergedKeys = new KeyList();
mergedKeys = mergedKeys.merge(keySets);
/**
******
* Keys Ranking
*******
*/
HashSet<String> bestKey = SupportMergedKeys.rank(mergedKeys, srcFile, tgtFile);
return bestKey;
}
use of legato.keys.def.Key in project legato by DOREMUS-ANR.
the class Sakey method extractKeys.
public static KeyList extractKeys(File file, KeyList keys) throws IOException {
LEGATO legato = LEGATO.getInstance();
ProcessBuilder pbSource = new ProcessBuilder("java", "-jar", legato.getPath() + File.separator + "sakey.jar", file.toString(), "1");
pbSource.directory(new File(legato.getPath() + File.separator));
Process pSource = pbSource.start();
/**
*****
* Parse the results
******
*/
BufferedReader reader = new BufferedReader(new InputStreamReader(pSource.getInputStream()));
StringBuilder builder = new StringBuilder();
String line = null;
while ((line = reader.readLine()) != null) {
builder.append(line);
builder.append(System.getProperty("line.separator"));
}
/**
*****
* Parse the generated keys
******
*/
if (!builder.toString().contains("0-almost keys:[]")) {
String[] tab1 = builder.toString().split("0-almost keys:");
for (String k : getKeys(tab1[1].substring(1, tab1[1].length() - 2))) {
Key key = new Key();
String[] tab2 = k.split(", ");
for (String property : tab2) {
// For each property
key.addProperty(property);
}
keys.add(key);
}
}
return keys;
}
use of legato.keys.def.Key in project legato by DOREMUS-ANR.
the class SupportMergedKeys method rank.
public static HashSet<String> rank(KeyList mKeys, File srcFile, File tgtFile) throws IOException {
LEGATO legato = LEGATO.getInstance();
/**
****
* Get All Merged Keys
*****
*/
// will contain all merged keys
HashSet<HashSet<String>> keys = new HashSet<>();
Iterator iter = mKeys.iterator();
while (// For each merged key
iter.hasNext()) {
HashSet<String> properties = new HashSet<>();
Key key = (Key) iter.next();
Iterator iterProp = key.iterator();
while (// For each property
iterProp.hasNext()) {
String property = (String) iterProp.next();
properties.add(property);
}
keys.add(properties);
}
/**
****
* Support computing
*****
*/
HashMap<String, HashSet<String>> srcResources = fileParsing(srcFile.toString());
computeSupport(keys, srcResources);
NBs = 100;
allInstances.clear();
cib = true;
HashMap<String, HashSet<String>> tgtResources = fileParsing(tgtFile.toString());
computeSupport(keys, tgtResources);
NBt = 100;
ValueComparator<HashSet<String>> compSource = new ValueComparator<HashSet<String>>(keysSource);
TreeMap<HashSet<String>, String> mapTrieeSource = new TreeMap<HashSet<String>, String>(compSource);
mapTrieeSource.putAll(keysSource);
ValueComparator<HashSet<String>> compTarget = new ValueComparator<HashSet<String>>(keysTarget);
TreeMap<HashSet<String>, String> mapTrieeTarget = new TreeMap<HashSet<String>, String>(compTarget);
mapTrieeTarget.putAll(keysTarget);
/**
****
* Keys Ranking
*****
*/
HashMap<HashSet<String>, String> mergedKeys = new HashMap<HashSet<String>, String>();
Iterator iterSource = mapTrieeSource.entrySet().iterator();
while (iterSource.hasNext()) {
Map.Entry keySource = (Map.Entry) iterSource.next();
Iterator iterTarget = mapTrieeTarget.entrySet().iterator();
while (iterTarget.hasNext()) {
Map.Entry keyTarget = (Map.Entry) iterTarget.next();
if (keySource.getKey().equals(keyTarget.getKey())) {
float s = Float.valueOf((String) keySource.getValue());
float t = Float.valueOf((String) keyTarget.getValue());
float rankValue = s * t;
mergedKeys.put((HashSet<String>) keySource.getKey(), String.valueOf(rankValue));
}
}
}
ValueComparator<HashSet<String>> compMerg = new ValueComparator<HashSet<String>>(mergedKeys);
TreeMap<HashSet<String>, String> mapTrieeMerg = new TreeMap<HashSet<String>, String>(compMerg);
mapTrieeMerg.putAll(mergedKeys);
/**
****
* Return the first key (with the highest score)
*****
*/
HashSet<String> res;
if (mapTrieeMerg.isEmpty())
res = null;
else
res = mapTrieeMerg.firstEntry().getKey();
return res;
}
Aggregations