Search in sources :

Example 1 with AlgoDBSCAN

use of ca.pfv.spmf.algorithms.clustering.dbscan.AlgoDBSCAN in project legato by DOREMUS-ANR.

the class PropertyHandler method clean.

/**
 *****
 * This class deletes problematic properties
 ******
 */
public static void clean(String srcPath, String tgtPath) throws IOException {
    LEGATO legato = LEGATO.getInstance();
    Model srcModel = ModelManager.loadModel(srcPath);
    Model tgtModel = ModelManager.loadModel(tgtPath);
    Model s = ModelFactory.createDefaultModel();
    Model t = ModelFactory.createDefaultModel();
    s = ModelManager.rewrite(srcModel, false);
    t = ModelManager.rewrite(tgtModel, false);
    Model mergedModel = ModelFactory.createDefaultModel();
    mergedModel.add(s);
    mergedModel.add(t);
    List<Resource> properties = getDistinctProperties(mergedModel);
    System.out.println(legato.getPropList());
    HashMap<String, String> propScoreList = new HashMap<String, String>();
    properties.forEach((property) -> {
        propScoreList.put(property.toString(), String.valueOf(getScore(property, mergedModel)));
    });
    ValueComparator<String> comp = new ValueComparator<String>(propScoreList);
    TreeMap<String, String> mapTriee = new TreeMap<String, String>(comp);
    mapTriee.putAll(propScoreList);
    System.out.println(mapTriee);
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < mapTriee.entrySet().size(); i++) {
        sb.append(Double.valueOf((String) mapTriee.values().toArray()[i]) + "\n");
    }
    ;
    FileManager.create("nom", sb.toString().trim());
    int minPts = 1;
    double epsilon = 5d;
    AlgoDBSCAN algo = new AlgoDBSCAN();
    List<Cluster> clusters = algo.runAlgorithm(legato.getPath() + File.separator + "nom.txt", minPts, epsilon, "\n");
    algo.printStatistics();
    double highMean = 0;
    double[] heterCluster = null;
    for (Cluster cluster : clusters) {
        double[] arr = new double[cluster.getVectors().size()];
        int i = 0;
        for (DoubleArray dataPoint : cluster.getVectors()) {
            arr[i++] = dataPoint.data[0];
        }
        A a = new A(arr);
        if (highMean < a.getMean()) {
            highMean = a.getMean();
            heterCluster = arr;
        }
        ;
    }
    List<String> propList = new ArrayList<String>();
    Iterator it = mapTriee.entrySet().iterator();
    while (it.hasNext()) {
        Entry<String, String> entry = (Entry<String, String>) it.next();
        boolean f = false;
        for (int i = 0; i < heterCluster.length; i++) {
            if (String.valueOf(heterCluster[i]).equals(entry.getValue()))
                propList.add(entry.getKey());
            ;
        }
    }
    System.out.println(propList);
    srcModel = ModelManager.rewrite(srcModel, true);
    System.out.println("source");
    tgtModel = ModelManager.rewrite(tgtModel, true);
    Model srcFinalModel = ModelFactory.createDefaultModel();
    srcModel.listStatements().toSet().forEach((stmt) -> {
        Property property = stmt.getPredicate();
        if (!(propList.contains(property.toString()))) {
            srcFinalModel.add(stmt);
        }
    });
    Model tgtFinalModel = ModelFactory.createDefaultModel();
    tgtModel.listStatements().toSet().forEach((stmt) -> {
        Property property = stmt.getPredicate();
        if (!propList.contains(property.toString())) {
            tgtFinalModel.add(stmt);
        }
    });
    // FileManager.createRDFile(new File(legato.getPath()+"store"), "source", srcFinalModel, "TTL");
    // FileManager.createRDFile(new File(legato.getPath()+"store"), "target", tgtFinalModel, "TTL");
    legato.setSource(FileManager.getCreatedRDFile("source", srcFinalModel));
    legato.setTarget(FileManager.getCreatedRDFile("target", tgtFinalModel));
    System.out.println("finish");
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Entry(java.util.Map.Entry) LEGATO(legato.LEGATO) Iterator(java.util.Iterator) Property(org.apache.jena.rdf.model.Property) Resource(org.apache.jena.rdf.model.Resource) Cluster(ca.pfv.spmf.patterns.cluster.Cluster) TreeMap(java.util.TreeMap) AlgoDBSCAN(ca.pfv.spmf.algorithms.clustering.dbscan.AlgoDBSCAN) Model(org.apache.jena.rdf.model.Model) DoubleArray(ca.pfv.spmf.patterns.cluster.DoubleArray)

Aggregations

AlgoDBSCAN (ca.pfv.spmf.algorithms.clustering.dbscan.AlgoDBSCAN)1 Cluster (ca.pfv.spmf.patterns.cluster.Cluster)1 DoubleArray (ca.pfv.spmf.patterns.cluster.DoubleArray)1 ArrayList (java.util.ArrayList)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1 Entry (java.util.Map.Entry)1 TreeMap (java.util.TreeMap)1 LEGATO (legato.LEGATO)1 Model (org.apache.jena.rdf.model.Model)1 Property (org.apache.jena.rdf.model.Property)1 Resource (org.apache.jena.rdf.model.Resource)1