Search in sources :

Example 11 with InvalidInputFileException

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.InvalidInputFileException in project clusterMaker2 by RBVI.

the class IteratorThread method calculateHierarichal.

private Hashtable<String, Hashtable<String, Boolean>> calculateHierarichal(double threshold, BufferedWriter bw, Edges es, HashMap<Integer, String> proteins2integers, HashMap<String, Integer> integers2proteins, Hashtable<String, Hashtable<String, Boolean>> clusterReference, Hashtable<String, Hashtable<String, Boolean>> clusters, Vector<String> singletons, Hashtable<Integer, Vector<Integer>> mergedNodes) throws IOException, ArgsParseException, InvalidInputFileException, InvalidTypeException {
    Vector<ConnectedComponent> connectedComponents = new Vector<ConnectedComponent>();
    if (threshold == TaskConfig.minThreshold) {
        Vector<Vector<Integer>> v = Splitter.splitIntoConnectedComponents(es, proteins2integers, (float) threshold, false);
        boolean[] already = new boolean[proteins2integers.size()];
        for (Vector<Integer> vector : v) {
            int count = 0;
            Vector<Integer> representants = new Vector<Integer>();
            for (int i = 0; i < vector.size(); i++) {
                if (!already[vector.get(i)]) {
                    representants.add(vector.get(i));
                    Vector<Integer> v2 = mergedNodes.get(vector.get(i));
                    for (int j = 0; j < v2.size(); j++) {
                        already[v2.get(j)] = true;
                    }
                    count++;
                }
            }
            ICCEdges cc2d2 = TaskConfig.ccEdgesEnum.createCCEdges(count);
            String[] ids = new String[count];
            Arrays.fill(ids, "");
            for (int i = 0; i < representants.size(); i++) {
                Vector<Integer> merged1 = mergedNodes.get(representants.get(i));
                for (int j = 0; j < merged1.size(); j++) {
                    if (j == 0)
                        ids[i] += proteins2integers.get(merged1.get(j));
                    else
                        ids[i] += "," + proteins2integers.get(merged1.get(j));
                }
                for (int j = i + 1; j < representants.size(); j++) {
                    Vector<Integer> merged2 = mergedNodes.get(representants.get(j));
                    float costs = 0;
                    for (int k = 0; k < merged1.size(); k++) {
                        for (int k2 = 0; k2 < merged2.size(); k2++) {
                            costs += (float) (InOut.getEdgeValue(merged1.get(k), merged2.get(k2), es) - threshold);
                        }
                    }
                    cc2d2.setEdgeCost(i, j, costs);
                }
            }
            ConnectedComponent cc = new ConnectedComponent(cc2d2, ids, null);
            connectedComponents.add(cc);
        }
    } else {
        boolean[] already = new boolean[proteins2integers.size()];
        for (Iterator<String> iterator = clusters.keySet().iterator(); iterator.hasNext(); ) {
            String key = iterator.next();
            Hashtable<String, Boolean> cluster = clusters.get(key);
            Vector<Integer> vector = new Vector<Integer>();
            for (Iterator<String> iter = cluster.keySet().iterator(); iter.hasNext(); ) {
                String element = iter.next();
                vector.add(integers2proteins.get(element));
            }
            int count = 0;
            Vector<Integer> representants = new Vector<Integer>();
            for (int i = 0; i < vector.size(); i++) {
                if (!already[vector.get(i)]) {
                    representants.add(vector.get(i));
                    Vector<Integer> v2 = mergedNodes.get(vector.get(i));
                    for (int j = 0; j < v2.size(); j++) {
                        already[v2.get(j)] = true;
                    }
                    count++;
                }
            }
            ICCEdges cc2d2 = TaskConfig.ccEdgesEnum.createCCEdges(count);
            String[] ids = new String[count];
            Arrays.fill(ids, "");
            for (int i = 0; i < representants.size(); i++) {
                Vector<Integer> merged1 = mergedNodes.get(representants.get(i));
                for (int j = 0; j < merged1.size(); j++) {
                    if (j == 0)
                        ids[i] += proteins2integers.get(merged1.get(j));
                    else
                        ids[i] += "," + proteins2integers.get(merged1.get(j));
                }
                for (int j = i + 1; j < representants.size(); j++) {
                    Vector<Integer> merged2 = mergedNodes.get(representants.get(j));
                    float costs = 0;
                    for (int k = 0; k < merged1.size(); k++) {
                        for (int k2 = 0; k2 < merged2.size(); k2++) {
                            costs += (float) (InOut.getEdgeValue(merged1.get(k), merged2.get(k2), es) - threshold);
                        }
                    }
                    cc2d2.setEdgeCost(i, j, costs);
                }
            }
            ConnectedComponent cc = new ConnectedComponent(cc2d2, ids, null);
            connectedComponents.add(cc);
        // String key =  iterator.next();
        // Hashtable<String, Boolean> cluster = clusters.get(key);
        // CC2DArray cc2d = new CC2DArray(cluster.size());
        // String[] ids = new String[cluster.size()];
        // int iterator_i = 0;
        // for (Iterator<String> iterator2 = cluster.keySet().iterator(); iterator2.hasNext();) {
        // String key2 = iterator2.next();
        // ids[iterator_i] = key2;
        // iterator_i++;
        // }
        // for (int i = 0; i < ids.length; i++) {
        // for (int j = i+1; j < ids.length; j++) {
        // cc2d.setEdgeCost(i, j, (float) (InOut.getEdgeValue(integers2proteins.get(ids[i]), integers2proteins.get(ids[j]), es)-threshold));
        // }
        // }
        // ConnectedComponent cc = new ConnectedComponent(cc2d,ids,null);
        // connectedComponents.add(cc);
        }
    }
    clusters = new Hashtable<String, Hashtable<String, Boolean>>();
    ClusteringManager cm = new ClusteringManager(null);
    ArrayList<Semaphore> allSemaphores = new ArrayList<Semaphore>();
    Semaphore maxThreadSemaphore = new Semaphore(TaskConfig.maxNoThreads, true);
    for (int i = 0; i < connectedComponents.size(); i++) {
        Semaphore semaphore = new Semaphore(1);
        allSemaphores.add(semaphore);
        cm.runClusteringForOneConnectedComponent(connectedComponents.get(i), null, semaphore, maxThreadSemaphore, System.currentTimeMillis());
        int[] elements2cluster = connectedComponents.get(i).getClusters();
        for (int j = 0; j < connectedComponents.get(i).getNumberOfClusters(); j++) {
            Hashtable<String, Boolean> cluster = new Hashtable<String, Boolean>();
            for (int k = 0; k < elements2cluster.length; k++) {
                if (elements2cluster[k] == j) {
                    String[] ids = connectedComponents.get(i).getObjectID(k).split(",");
                    for (int l = 0; l < ids.length; l++) {
                        cluster.put(ids[l], true);
                    }
                }
            }
            clusters.put(new Random().nextDouble() + "", cluster);
        }
    }
    /* wait for all clustering tasks to finish */
    for (Semaphore s : allSemaphores) {
        try {
            s.acquire();
        } catch (InterruptedException e) {
            TaskConfig.monitor.showMessage(TaskMonitor.Level.ERROR, e.getMessage());
        // e.printStackTrace();
        }
    }
    bw.write(threshold + "\t");
    if (clusterReference != null) {
        double fmeasure = Fmeassure.fMeassure(clusterReference, clusters);
        bw.write(fmeasure + "\t");
        TaskConfig.monitor.setStatusMessage("fmeasure: " + fmeasure);
    } else {
        bw.write("-\t");
    }
    int[] distribution = new int[1000000];
    int max = 1;
    boolean first = true;
    Vector<String> keysToRemove = new Vector<String>();
    for (Iterator<String> iterator = clusters.keySet().iterator(); iterator.hasNext(); ) {
        String key = iterator.next();
        Hashtable<String, Boolean> h = clusters.get(key);
        if (!first)
            bw.write(";");
        if (h.size() == 1) {
            singletons.add(h.keySet().iterator().next());
            keysToRemove.add(key);
        } else {
            first = true;
            for (Iterator<String> iterator2 = h.keySet().iterator(); iterator2.hasNext(); ) {
                String id = iterator2.next();
                if (first) {
                    first = false;
                    bw.write(id);
                } else
                    bw.write("," + id);
            }
            distribution[h.size()]++;
            if (h.size() > max)
                max = h.size();
        }
    }
    for (String key : keysToRemove) {
        clusters.remove(key);
    }
    for (String id : singletons) {
        bw.write(";" + id);
    }
    distribution[1] = singletons.size();
    StringBuffer sb = new StringBuffer("cluster distribution: ");
    for (int i = max; i >= 0; i--) {
        if (distribution[i] > 0)
            sb.append(i + ":" + distribution[i] + ", ");
    }
    TaskConfig.monitor.setStatusMessage(sb.toString());
    TaskConfig.monitor.setStatusMessage("");
    bw.newLine();
    return clusters;
}
Also used : ArrayList(java.util.ArrayList) Semaphore(java.util.concurrent.Semaphore) ClusteringManager(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.ClusteringManager) Random(java.util.Random) Vector(java.util.Vector) Hashtable(java.util.Hashtable) ICCEdges(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.datastructure.ICCEdges) ConnectedComponent(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.datastructure.ConnectedComponent)

Example 12 with InvalidInputFileException

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.InvalidInputFileException in project clusterMaker2 by RBVI.

the class IteratorThread method calculateHierarichal2.

private Hashtable<String, Hashtable<String, Boolean>> calculateHierarichal2(double threshold, BufferedWriter bw, Edges es, HashMap<Integer, String> proteins2integers, HashMap<String, Integer> integers2proteins, Hashtable<String, Hashtable<String, Boolean>> clusterReference, Hashtable<String, Hashtable<String, Boolean>> clusters, Hashtable<Integer, Vector<Integer>> mergedNodes, HashMap<String, String> referenceHash) throws IOException, InvalidInputFileException {
    Vector<ConnectedComponent> connectedComponents = new Vector<ConnectedComponent>();
    if (threshold == TaskConfig.maxThreshold) {
        Vector<Vector<Integer>> v = Splitter.splitIntoConnectedComponents(es, proteins2integers, (float) threshold, false);
        boolean[] already = new boolean[proteins2integers.size()];
        for (Vector<Integer> vector : v) {
            int count = 0;
            Vector<Integer> representants = new Vector<Integer>();
            for (int i = 0; i < vector.size(); i++) {
                if (!already[vector.get(i)]) {
                    representants.add(vector.get(i));
                    Vector<Integer> v2 = mergedNodes.get(vector.get(i));
                    for (int j = 0; j < v2.size(); j++) {
                        already[v2.get(j)] = true;
                    }
                    count++;
                }
            }
            ICCEdges cc2d2 = TaskConfig.ccEdgesEnum.createCCEdges(count);
            String[] ids = new String[count];
            Arrays.fill(ids, "");
            for (int i = 0; i < representants.size(); i++) {
                Vector<Integer> merged1 = mergedNodes.get(representants.get(i));
                for (int j = 0; j < merged1.size(); j++) {
                    if (j == 0)
                        ids[i] += proteins2integers.get(merged1.get(j));
                    else
                        ids[i] += "," + proteins2integers.get(merged1.get(j));
                }
                for (int j = i + 1; j < representants.size(); j++) {
                    Vector<Integer> merged2 = mergedNodes.get(representants.get(j));
                    float costs = 0;
                    for (int k = 0; k < merged1.size(); k++) {
                        for (int k2 = 0; k2 < merged2.size(); k2++) {
                            costs += (float) (InOut.getEdgeValue(merged1.get(k), merged2.get(k2), es) - threshold);
                        }
                    }
                    cc2d2.setEdgeCost(i, j, costs);
                }
            }
            ConnectedComponent cc = new ConnectedComponent(cc2d2, ids, null);
            connectedComponents.add(cc);
        }
    } else {
        Vector<Vector<Integer>> mergableNodes = new Vector<Vector<Integer>>();
        for (Iterator iterator = clusters.keySet().iterator(); iterator.hasNext(); ) {
            String key = (String) iterator.next();
            Hashtable<String, Boolean> h = clusters.get(key);
            Vector<Integer> v = new Vector<Integer>();
            for (Iterator iterator2 = h.keySet().iterator(); iterator2.hasNext(); ) {
                String id = (String) iterator2.next();
                v.add(integers2proteins.get(id));
            }
            mergableNodes.add(v);
        }
        // Vector<Vector<Integer>> mergableNodes = Splitter.splitIntoConnectedComponents(es, proteins2integers, (float) upperBound, false);
        mergedNodes = new Hashtable<Integer, Vector<Integer>>();
        for (int i = 0; i < mergableNodes.size(); i++) {
            Vector<Integer> v2 = mergableNodes.get(i);
            for (int j = 0; j < v2.size(); j++) {
                mergedNodes.put(v2.get(j), v2);
            }
        }
        Vector<Vector<Integer>> v = Splitter.splitIntoConnectedComponents(es, proteins2integers, (float) threshold, false);
        boolean[] already = new boolean[proteins2integers.size()];
        for (Vector<Integer> vector : v) {
            int count = 0;
            Vector<Integer> representants = new Vector<Integer>();
            for (int i = 0; i < vector.size(); i++) {
                if (!already[vector.get(i)]) {
                    representants.add(vector.get(i));
                    Vector<Integer> v2 = mergedNodes.get(vector.get(i));
                    for (int j = 0; j < v2.size(); j++) {
                        already[v2.get(j)] = true;
                    }
                    count++;
                }
            }
            ICCEdges cc2d2 = TaskConfig.ccEdgesEnum.createCCEdges(count);
            String[] ids = new String[count];
            Arrays.fill(ids, "");
            for (int i = 0; i < representants.size(); i++) {
                Vector<Integer> merged1 = mergedNodes.get(representants.get(i));
                for (int j = 0; j < merged1.size(); j++) {
                    if (j == 0)
                        ids[i] += proteins2integers.get(merged1.get(j));
                    else
                        ids[i] += "," + proteins2integers.get(merged1.get(j));
                }
                for (int j = i + 1; j < representants.size(); j++) {
                    Vector<Integer> merged2 = mergedNodes.get(representants.get(j));
                    float costs = 0;
                    for (int k = 0; k < merged1.size(); k++) {
                        for (int k2 = 0; k2 < merged2.size(); k2++) {
                            costs += (float) (InOut.getEdgeValue(merged1.get(k), merged2.get(k2), es) - threshold);
                        }
                    }
                    cc2d2.setEdgeCost(i, j, costs);
                }
            }
            ConnectedComponent cc = new ConnectedComponent(cc2d2, ids, null);
            connectedComponents.add(cc);
        }
    }
    clusters = new Hashtable<String, Hashtable<String, Boolean>>();
    ClusteringManager cm = new ClusteringManager(null);
    ArrayList<Semaphore> allSemaphores = new ArrayList<Semaphore>();
    Semaphore maxThreadSemaphore = new Semaphore(TaskConfig.maxNoThreads, true);
    for (int i = 0; i < connectedComponents.size(); i++) {
        Semaphore semaphore = new Semaphore(1);
        allSemaphores.add(semaphore);
        cm.runClusteringForOneConnectedComponent(connectedComponents.get(i), null, semaphore, maxThreadSemaphore, System.currentTimeMillis());
        int[] elements2cluster = connectedComponents.get(i).getClusters();
        for (int j = 0; j < connectedComponents.get(i).getNumberOfClusters(); j++) {
            Hashtable<String, Boolean> cluster = new Hashtable<String, Boolean>();
            for (int k = 0; k < elements2cluster.length; k++) {
                if (elements2cluster[k] == j) {
                    String[] ids = connectedComponents.get(i).getObjectID(k).split(",");
                    for (int l = 0; l < ids.length; l++) {
                        cluster.put(ids[l], true);
                    }
                }
            }
            clusters.put(new Random().nextDouble() + "", cluster);
        }
    }
    /* wait for all clustering tasks to finish */
    for (Semaphore s : allSemaphores) {
        try {
            s.acquire();
        } catch (InterruptedException e) {
            TaskConfig.monitor.showMessage(TaskMonitor.Level.ERROR, e.getMessage());
        // e.printStackTrace();
        }
    }
    Hashtable<String, Hashtable<String, Boolean>> clustersCopy = (Hashtable<String, Hashtable<String, Boolean>>) clusters.clone();
    if (TaskConfig.goldstandardPath != null) {
        ArrayList<String> dummyRemove2 = new ArrayList<String>();
        for (Iterator iterator2 = clusters.keySet().iterator(); iterator2.hasNext(); ) {
            String key = (String) iterator2.next();
            Hashtable<String, Boolean> h = clusters.get(key);
            ArrayList<String> dummyRemove = new ArrayList<String>();
            for (Iterator iterator = h.keySet().iterator(); iterator.hasNext(); ) {
                String string = (String) iterator.next();
                if (!referenceHash.containsKey(string)) {
                    dummyRemove.add(string);
                }
            }
            for (String string : dummyRemove) {
                h.remove(string);
            }
            if (h.isEmpty())
                dummyRemove2.add(key);
        }
        for (String string : dummyRemove2) {
            clustersCopy.remove(string);
        }
    }
    bw.write(threshold + "\t");
    if (clusterReference != null) {
        double fmeasure = Fmeassure.fMeassure(clusterReference, clustersCopy);
        bw.write(fmeasure + "\t");
        TaskConfig.monitor.setStatusMessage("fmeasure: " + fmeasure);
    } else {
        bw.write("-\t");
    }
    int[] distribution = new int[1000000];
    int max = 1;
    boolean first = true;
    Vector<String> keysToRemove = new Vector<String>();
    for (Iterator<String> iterator = clusters.keySet().iterator(); iterator.hasNext(); ) {
        String key = iterator.next();
        Hashtable<String, Boolean> h = clusters.get(key);
        if (!first)
            bw.write(";");
        first = true;
        for (Iterator<String> iterator2 = h.keySet().iterator(); iterator2.hasNext(); ) {
            String id = iterator2.next();
            if (first) {
                first = false;
                bw.write(id);
            } else
                bw.write("," + id);
        }
        distribution[h.size()]++;
        if (h.size() > max)
            max = h.size();
    }
    StringBuffer sb = new StringBuffer("cluster distribution: ");
    for (int i = max; i >= 0; i--) {
        if (distribution[i] > 0)
            sb.append(i + ":" + distribution[i] + ", ");
    }
    TaskConfig.monitor.setStatusMessage(sb.toString());
    TaskConfig.monitor.setStatusMessage("");
    bw.newLine();
    mergedNodes.clear();
    for (Iterator iterator = clusters.keySet().iterator(); iterator.hasNext(); ) {
        String key = (String) iterator.next();
        Hashtable<String, Boolean> currentCluster = clusters.get(key);
        Vector<Integer> currentClusterVector = new Vector<Integer>();
        for (Iterator iterator2 = currentCluster.keySet().iterator(); iterator2.hasNext(); ) {
            String id = (String) iterator2.next();
            currentClusterVector.add(integers2proteins.get(id));
        }
        for (Integer integer : currentClusterVector) {
            mergedNodes.put(integer, currentClusterVector);
        }
    }
    return clusters;
}
Also used : ArrayList(java.util.ArrayList) Semaphore(java.util.concurrent.Semaphore) ClusteringManager(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.ClusteringManager) Random(java.util.Random) Iterator(java.util.Iterator) Vector(java.util.Vector) Hashtable(java.util.Hashtable) ICCEdges(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.datastructure.ICCEdges) ConnectedComponent(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.datastructure.ConnectedComponent)

Aggregations

ConnectedComponent (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.datastructure.ConnectedComponent)5 ArrayList (java.util.ArrayList)5 IParameters (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.layout.IParameters)4 ICCEdges (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.datastructure.ICCEdges)3 ClusteringManager (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.ClusteringManager)3 Semaphore (java.util.concurrent.Semaphore)3 CostMatrixReader (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.datastructure.CostMatrixReader)2 FORCEnDParameters (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.layout.forcend.FORCEnDParameters)2 InvalidInputFileException (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.InvalidInputFileException)2 ClusterFile (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.io.ClusterFile)2 InfoFile (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.taskmanaging.io.InfoFile)2 File (java.io.File)2 IOException (java.io.IOException)2 Hashtable (java.util.Hashtable)2 Random (java.util.Random)2 Vector (java.util.Vector)2 GreedyClusterer (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.greedy.GreedyClusterer)1 IteratorThread (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.iterativeclustering.IteratorThread)1 ILayoutInitialiser (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.layout.ILayoutInitialiser)1 IParameterTraining (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.layclust.layout.parameter_training.IParameterTraining)1