Search in sources :

Example 1 with Key

use of legato.keys.def.Key in project legato by DOREMUS-ANR.

the class KeysClassifier method getBestKey.

public static HashSet<String> getBestKey(Model srcModel, Model tgtModel, File dirCluster) throws IOException {
    legato = LEGATO.getInstance();
    /**
     *****************************************************************************************
     ** Place all Literals (in resources CBD) to a distance = 1
     * Reasons :
     ***********+ SAKey considers blank nodes as "Strings"
     ***********+ SILK gives different results when comparing property values whose distance > 1
     ******************************************************************************************
     */
    // srcModel = ModelManager.rewrite(srcModel);
    // tgtModel = ModelManager.rewrite(tgtModel);
    /**
     ********
     * Filter triples whose properties are common for both datasets
     *********
     */
    List<Property> commonProperties = getCommonProperties(srcModel, tgtModel);
    srcModel = ModelManager.getFilteredTriples(srcModel, commonProperties);
    tgtModel = ModelManager.getFilteredTriples(tgtModel, commonProperties);
    /**
     ********
     * Save the 2 models temporarily in 2 RDF files in "N-TRIPLES" (The only format accepted by SAKey)
     *********
     */
    FileManager.createRDFile(dirCluster, "source", srcModel, "nt");
    FileManager.createRDFile(dirCluster, "target", tgtModel, "nt");
    /**
     *****
     * The keys of the "source" and "target" datasets are saved in "srcKeys" and "tgtKeys" respectively
     ******
     */
    KeyList srcKeys = new KeyList();
    KeyList tgtKeys = new KeyList();
    File srcFile = new File(dirCluster.getAbsolutePath() + File.separator + "source.nt");
    File tgtFile = new File(dirCluster.getAbsolutePath() + File.separator + "target.nt");
    srcKeys = Sakey.extractKeys(srcFile, srcKeys);
    tgtKeys = Sakey.extractKeys(tgtFile, tgtKeys);
    /**
     *******
     * Merge the 2 sets of keys
     ********
     */
    // keySet1 = all the keys of "srcKeys"
    HashSet<Key> keySet1 = new HashSet();
    // keySet2 = all the keys of "tgtKeys"
    HashSet<Key> keySet2 = new HashSet();
    Iterator iter1 = srcKeys.iterator();
    while (iter1.hasNext()) keySet1.add((Key) iter1.next());
    Iterator iter2 = tgtKeys.iterator();
    while (iter2.hasNext()) keySet2.add((Key) iter2.next());
    HashSet<HashSet<Key>> keySets = new HashSet<HashSet<Key>>();
    keySets.add(keySet1);
    keySets.add(keySet2);
    KeyList mergedKeys = new KeyList();
    mergedKeys = mergedKeys.merge(keySets);
    /**
     ******
     * Keys Ranking
     *******
     */
    HashSet<String> bestKey = SupportMergedKeys.rank(mergedKeys, srcFile, tgtFile);
    return bestKey;
}
Also used : KeyList(legato.keys.def.KeyList) StmtIterator(org.apache.jena.rdf.model.StmtIterator) Iterator(java.util.Iterator) Property(org.apache.jena.rdf.model.Property) File(java.io.File) Key(legato.keys.def.Key) HashSet(java.util.HashSet)

Example 2 with Key

use of legato.keys.def.Key in project legato by DOREMUS-ANR.

the class Sakey method extractKeys.

public static KeyList extractKeys(File file, KeyList keys) throws IOException {
    LEGATO legato = LEGATO.getInstance();
    ProcessBuilder pbSource = new ProcessBuilder("java", "-jar", legato.getPath() + File.separator + "sakey.jar", file.toString(), "1");
    pbSource.directory(new File(legato.getPath() + File.separator));
    Process pSource = pbSource.start();
    /**
     *****
     * Parse the results
     ******
     */
    BufferedReader reader = new BufferedReader(new InputStreamReader(pSource.getInputStream()));
    StringBuilder builder = new StringBuilder();
    String line = null;
    while ((line = reader.readLine()) != null) {
        builder.append(line);
        builder.append(System.getProperty("line.separator"));
    }
    /**
     *****
     *  Parse the generated keys
     ******
     */
    if (!builder.toString().contains("0-almost keys:[]")) {
        String[] tab1 = builder.toString().split("0-almost keys:");
        for (String k : getKeys(tab1[1].substring(1, tab1[1].length() - 2))) {
            Key key = new Key();
            String[] tab2 = k.split(", ");
            for (String property : tab2) {
                // For each property
                key.addProperty(property);
            }
            keys.add(key);
        }
    }
    return keys;
}
Also used : LEGATO(legato.LEGATO) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) File(java.io.File) Key(legato.keys.def.Key)

Example 3 with Key

use of legato.keys.def.Key in project legato by DOREMUS-ANR.

the class SupportMergedKeys method rank.

public static HashSet<String> rank(KeyList mKeys, File srcFile, File tgtFile) throws IOException {
    LEGATO legato = LEGATO.getInstance();
    /**
     ****
     * Get All Merged Keys
     *****
     */
    // will contain all merged keys
    HashSet<HashSet<String>> keys = new HashSet<>();
    Iterator iter = mKeys.iterator();
    while (// For each merged key
    iter.hasNext()) {
        HashSet<String> properties = new HashSet<>();
        Key key = (Key) iter.next();
        Iterator iterProp = key.iterator();
        while (// For each property
        iterProp.hasNext()) {
            String property = (String) iterProp.next();
            properties.add(property);
        }
        keys.add(properties);
    }
    /**
     ****
     * Support computing
     *****
     */
    HashMap<String, HashSet<String>> srcResources = fileParsing(srcFile.toString());
    computeSupport(keys, srcResources);
    NBs = 100;
    allInstances.clear();
    cib = true;
    HashMap<String, HashSet<String>> tgtResources = fileParsing(tgtFile.toString());
    computeSupport(keys, tgtResources);
    NBt = 100;
    ValueComparator<HashSet<String>> compSource = new ValueComparator<HashSet<String>>(keysSource);
    TreeMap<HashSet<String>, String> mapTrieeSource = new TreeMap<HashSet<String>, String>(compSource);
    mapTrieeSource.putAll(keysSource);
    ValueComparator<HashSet<String>> compTarget = new ValueComparator<HashSet<String>>(keysTarget);
    TreeMap<HashSet<String>, String> mapTrieeTarget = new TreeMap<HashSet<String>, String>(compTarget);
    mapTrieeTarget.putAll(keysTarget);
    /**
     ****
     * Keys Ranking
     *****
     */
    HashMap<HashSet<String>, String> mergedKeys = new HashMap<HashSet<String>, String>();
    Iterator iterSource = mapTrieeSource.entrySet().iterator();
    while (iterSource.hasNext()) {
        Map.Entry keySource = (Map.Entry) iterSource.next();
        Iterator iterTarget = mapTrieeTarget.entrySet().iterator();
        while (iterTarget.hasNext()) {
            Map.Entry keyTarget = (Map.Entry) iterTarget.next();
            if (keySource.getKey().equals(keyTarget.getKey())) {
                float s = Float.valueOf((String) keySource.getValue());
                float t = Float.valueOf((String) keyTarget.getValue());
                float rankValue = s * t;
                mergedKeys.put((HashSet<String>) keySource.getKey(), String.valueOf(rankValue));
            }
        }
    }
    ValueComparator<HashSet<String>> compMerg = new ValueComparator<HashSet<String>>(mergedKeys);
    TreeMap<HashSet<String>, String> mapTrieeMerg = new TreeMap<HashSet<String>, String>(compMerg);
    mapTrieeMerg.putAll(mergedKeys);
    /**
     ****
     * Return the first key (with the highest score)
     *****
     */
    HashSet<String> res;
    if (mapTrieeMerg.isEmpty())
        res = null;
    else
        res = mapTrieeMerg.firstEntry().getKey();
    return res;
}
Also used : HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) ValueComparator(legato.utils.ValueComparator) LEGATO(legato.LEGATO) StmtIterator(org.apache.jena.rdf.model.StmtIterator) Iterator(java.util.Iterator) HashMap(java.util.HashMap) TreeMap(java.util.TreeMap) Map(java.util.Map) Key(legato.keys.def.Key) HashSet(java.util.HashSet)

Aggregations

Key (legato.keys.def.Key)3 File (java.io.File)2 HashSet (java.util.HashSet)2 Iterator (java.util.Iterator)2 LEGATO (legato.LEGATO)2 StmtIterator (org.apache.jena.rdf.model.StmtIterator)2 BufferedReader (java.io.BufferedReader)1 InputStreamReader (java.io.InputStreamReader)1 HashMap (java.util.HashMap)1 Map (java.util.Map)1 TreeMap (java.util.TreeMap)1 KeyList (legato.keys.def.KeyList)1 ValueComparator (legato.utils.ValueComparator)1 Property (org.apache.jena.rdf.model.Property)1