use of ca.pfv.spmf.algorithms.clustering.dbscan.AlgoDBSCAN in project legato by DOREMUS-ANR.
the class PropertyHandler method clean.
/**
*****
* This class deletes problematic properties
******
*/
public static void clean(String srcPath, String tgtPath) throws IOException {
LEGATO legato = LEGATO.getInstance();
Model srcModel = ModelManager.loadModel(srcPath);
Model tgtModel = ModelManager.loadModel(tgtPath);
Model s = ModelFactory.createDefaultModel();
Model t = ModelFactory.createDefaultModel();
s = ModelManager.rewrite(srcModel, false);
t = ModelManager.rewrite(tgtModel, false);
Model mergedModel = ModelFactory.createDefaultModel();
mergedModel.add(s);
mergedModel.add(t);
List<Resource> properties = getDistinctProperties(mergedModel);
System.out.println(legato.getPropList());
HashMap<String, String> propScoreList = new HashMap<String, String>();
properties.forEach((property) -> {
propScoreList.put(property.toString(), String.valueOf(getScore(property, mergedModel)));
});
ValueComparator<String> comp = new ValueComparator<String>(propScoreList);
TreeMap<String, String> mapTriee = new TreeMap<String, String>(comp);
mapTriee.putAll(propScoreList);
System.out.println(mapTriee);
StringBuilder sb = new StringBuilder();
for (int i = 0; i < mapTriee.entrySet().size(); i++) {
sb.append(Double.valueOf((String) mapTriee.values().toArray()[i]) + "\n");
}
;
FileManager.create("nom", sb.toString().trim());
int minPts = 1;
double epsilon = 5d;
AlgoDBSCAN algo = new AlgoDBSCAN();
List<Cluster> clusters = algo.runAlgorithm(legato.getPath() + File.separator + "nom.txt", minPts, epsilon, "\n");
algo.printStatistics();
double highMean = 0;
double[] heterCluster = null;
for (Cluster cluster : clusters) {
double[] arr = new double[cluster.getVectors().size()];
int i = 0;
for (DoubleArray dataPoint : cluster.getVectors()) {
arr[i++] = dataPoint.data[0];
}
A a = new A(arr);
if (highMean < a.getMean()) {
highMean = a.getMean();
heterCluster = arr;
}
;
}
List<String> propList = new ArrayList<String>();
Iterator it = mapTriee.entrySet().iterator();
while (it.hasNext()) {
Entry<String, String> entry = (Entry<String, String>) it.next();
boolean f = false;
for (int i = 0; i < heterCluster.length; i++) {
if (String.valueOf(heterCluster[i]).equals(entry.getValue()))
propList.add(entry.getKey());
;
}
}
System.out.println(propList);
srcModel = ModelManager.rewrite(srcModel, true);
System.out.println("source");
tgtModel = ModelManager.rewrite(tgtModel, true);
Model srcFinalModel = ModelFactory.createDefaultModel();
srcModel.listStatements().toSet().forEach((stmt) -> {
Property property = stmt.getPredicate();
if (!(propList.contains(property.toString()))) {
srcFinalModel.add(stmt);
}
});
Model tgtFinalModel = ModelFactory.createDefaultModel();
tgtModel.listStatements().toSet().forEach((stmt) -> {
Property property = stmt.getPredicate();
if (!propList.contains(property.toString())) {
tgtFinalModel.add(stmt);
}
});
// FileManager.createRDFile(new File(legato.getPath()+"store"), "source", srcFinalModel, "TTL");
// FileManager.createRDFile(new File(legato.getPath()+"store"), "target", tgtFinalModel, "TTL");
legato.setSource(FileManager.getCreatedRDFile("source", srcFinalModel));
legato.setTarget(FileManager.getCreatedRDFile("target", tgtFinalModel));
System.out.println("finish");
}
Aggregations