Search in sources :

Example 1 with BlastFile

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile in project clusterMaker2 by RBVI.

the class Creator method createHSPCluster.

@SuppressWarnings("unchecked")
private void createHSPCluster(BlastFile bf, HashMap<Integer, String> proteins2integers, HashMap<String, Integer> integers2proteins, int[] proteinLengths) {
    HashMap<String, Integer> integers2proteinsClone = (HashMap<String, Integer>) integers2proteins.clone();
    HashMap<Integer, String> proteins2integersClone = (HashMap<Integer, String>) proteins2integers.clone();
    // integers2proteins.clear();
    // proteins2integers.clear();
    HashMap<String, SourceHSPs> sourceHSPs = new HashMap<String, SourceHSPs>();
    for (Iterator<String> iterator = integers2proteinsClone.keySet().iterator(); iterator.hasNext(); ) {
        String id = iterator.next();
        int idInt = integers2proteinsClone.get(id);
        SourceHSPs sHSP = new SourceHSPs(new ArrayList<boolean[]>(), proteinLengths[idInt], new ArrayList<Integer>(), id);
        sourceHSPs.put(id, sHSP);
    }
    for (int i = 0; i < bf.size; i++) {
        if (bf.getSource(i) == bf.getTarget(i))
            continue;
        SourceHSPs sourceHSP = sourceHSPs.get(proteins2integersClone.get(bf.getSource(i)));
        SourceHSPs targetHSP = sourceHSPs.get(proteins2integersClone.get(bf.getTarget(i)));
        boolean[] sourceCoverage = new boolean[proteinLengths[bf.getSource(i)]];
        boolean[] targetCoverage = new boolean[proteinLengths[bf.getTarget(i)]];
        Arrays.fill(sourceCoverage, bf.getStartQuery(i), bf.getEndQuery(i), true);
        Arrays.fill(targetCoverage, bf.getStartSubject(i), bf.getEndSubject(i), true);
        ArrayList<boolean[]> coveragesSource = sourceHSP.getCoverages();
        sourceHSP.addLine(i);
        if (coveragesSource.isEmpty()) {
            coveragesSource.add(sourceCoverage);
            sourceHSP.addCluster(0);
            sourceHSP.addClusterLine(0);
        } else {
            boolean match = false;
            for (int j = 0; j < coveragesSource.size(); j++) {
                boolean[] coverage = coveragesSource.get(j);
                float sim = calculateSimilarity(coverage, sourceCoverage);
                if (sim > 0.8) {
                    match = true;
                    sourceHSP.addCluster(j);
                    break;
                }
            }
            if (!match) {
                coveragesSource.add(sourceCoverage);
                sourceHSP.setClusternr(sourceHSP.getClusternr() + 1);
                sourceHSP.addClusterLine(i);
                sourceHSP.addCluster(sourceHSP.getClusternr());
            }
        }
        ArrayList<boolean[]> coveragesTarget = targetHSP.getCoverages();
        targetHSP.addLine(i);
        if (coveragesTarget.isEmpty()) {
            coveragesTarget.add(targetCoverage);
            targetHSP.addCluster(0);
            targetHSP.addClusterLine(0);
        } else {
            boolean match = false;
            for (int j = 0; j < coveragesTarget.size(); j++) {
                boolean[] coverage = coveragesTarget.get(j);
                float sim = calculateSimilarity(coverage, targetCoverage);
                if (sim > 0.8) {
                    match = true;
                    targetHSP.addCluster(j);
                    break;
                }
            }
            if (!match) {
                coveragesTarget.add(targetCoverage);
                targetHSP.setClusternr(targetHSP.getClusternr() + 1);
                targetHSP.addClusterLine(i);
                targetHSP.addCluster(targetHSP.getClusternr());
            }
        }
    }
    for (Iterator iterator = sourceHSPs.keySet().iterator(); iterator.hasNext(); ) {
        String key = (String) iterator.next();
        SourceHSPs s = sourceHSPs.get(key);
    }
    try {
        BufferedReader br = new BufferedReader(new FileReader(Config.blastFile));
        BufferedWriter bw = new BufferedWriter(new FileWriter(Config.blastFile + "HSP"));
        String line;
        int k = 0;
        while ((line = br.readLine()) != null) {
            if (line.trim().equals(""))
                continue;
            String[] tabs = line.split("\t");
            if (tabs[0].equals(tabs[1])) {
                k++;
                continue;
            }
            SourceHSPs source = sourceHSPs.get(tabs[0]);
            SourceHSPs target = sourceHSPs.get(tabs[1]);
            int sourceIndex = source.getLines().indexOf(k);
            int targetIndex = target.getLines().indexOf(k);
            bw.write(tabs[0] + "_HSP" + source.getCluster(sourceIndex) + "\t" + tabs[1] + "_HSP" + target.getCluster(targetIndex) + "\t");
            for (int j = 2; j < tabs.length; j++) {
                bw.write(tabs[j]);
                if (j < tabs.length - 1) {
                    bw.write("\t");
                } else {
                    bw.newLine();
                }
            }
            k++;
        }
        br.close();
        bw.flush();
        bw.close();
        br = new BufferedReader(new FileReader(Config.fastaFile));
        bw = new BufferedWriter(new FileWriter(Config.fastaFile + "HSP"));
        String sequence = "";
        String id = "";
        while ((line = br.readLine()) != null) {
            if (line.trim().equals(""))
                continue;
            if (line.startsWith(">")) {
                if (!sequence.equals("")) {
                    SourceHSPs source = sourceHSPs.get(id);
                    for (int j = 0; j <= source.getClusternr(); j++) {
                        bw.write(">" + id + "_HSP" + j);
                        bw.newLine();
                        bw.write(sequence);
                        bw.newLine();
                    }
                    sequence = "";
                }
                id = line.substring(1);
            } else {
                sequence += line;
            }
        }
        SourceHSPs source = sourceHSPs.get(id);
        for (int j = 0; j <= source.getClusternr(); j++) {
            bw.write(">" + id + "_HSP" + j);
            bw.newLine();
            bw.write(sequence);
            bw.newLine();
        }
        br.close();
        bw.flush();
        bw.close();
    } catch (Exception e) {
        e.printStackTrace();
        System.exit(1);
    }
}
Also used : HashMap(java.util.HashMap) SourceHSPs(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.SourceHSPs) FileWriter(java.io.FileWriter) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) BufferedWriter(java.io.BufferedWriter) Iterator(java.util.Iterator) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader)

Example 2 with BlastFile

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile in project clusterMaker2 by RBVI.

the class Creator method run.

public void run(HashMap<Integer, String> proteins2integers, HashMap<String, Integer> integers2proteins) throws IOException {
    if (Config.source == Config.BLAST) {
        if (Config.gui)
            Console.println("Read Fasta file ... ");
        int[] proteinLengths = InOut.readFastaFile(Config.fastaFile, proteins2integers, integers2proteins);
        if (Config.gui)
            Console.println();
        if (Config.gui)
            Console.println("Read Blast file ... ");
        BlastFile bf = InOut.readBlastFileWithArray(Config.blastFile, integers2proteins, proteinLengths);
        if (Config.gui)
            Console.println();
        if (Config.gui)
            Console.println("Create similarity file ...");
        createSimilarityFileFromArray(Config.similarityFile, bf, proteins2integers, proteinLengths, Config.costModel);
        if (Config.gui)
            Console.println();
        if (Config.splitAndWriteCostMatrices) {
            bf = null;
            System.gc();
        }
    } else {
    // if(Config.gui) Console.println("Read Matrix ...");
    // createSimilarityFileFromExpressionMatrix(Config.similarityFile, Config.expressionMatrix, Config.withHeader, Config.withRowDescription, Config.costModel);
    }
}
Also used : BlastFile(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile)

Example 3 with BlastFile

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile in project clusterMaker2 by RBVI.

the class InOut method readBlastFileWithArray.

public static BlastFile readBlastFileWithArray(String blastFile, HashMap<String, Integer> integers2proteins, int[] proteinlengths) throws IOException {
    File f = new File(blastFile);
    BufferedReader br = new BufferedReader(new FileReader(blastFile));
    if (Config.gui) {
        Console.restartBar(0, 100);
        Console.setBarText("start counting lines of BLAST file");
    } else {
    // System.out.print("\t start counting lines of BLAST file ...");
    }
    int lineCount = countLines(f);
    if (Config.gui) {
        Console.println("" + lineCount);
        Console.println();
    } else {
    }
    BlastFile bf = new BlastFile(lineCount);
    br.close();
    br = new BufferedReader(new FileReader(blastFile));
    if (Config.gui) {
        Console.println("\t start reading BLAST file ...");
        Console.restartBar(0, 100);
        Console.setBarText("reading blast file");
    } else {
    }
    int i = 0;
    double percent = 0;
    double percentOld = 0;
    double normalizeFactorFromBlastCutoff = Math.log10(Config.blastCutoff);
    try {
        while (true) {
            String line = br.readLine();
            if (i % 10000 == 0 && i > 0) {
                percent = Math.rint(((double) i / lineCount) * 10000) / 100;
                if (percent > percentOld + 1) {
                    percentOld = percent;
                    if (Config.gui) {
                        Console.setBarValue((int) Math.rint(percent));
                        Console.setBarTextPlusRestTime("reading BLAST file  " + percent + " %");
                    }
                // else System.out.print(percent + " %" + "\t");
                }
            }
            String[] columns = line.split(TAB);
            String source = columns[0];
            int sourceInt = integers2proteins.get(source);
            String target = columns[1];
            int targetInt = integers2proteins.get(target);
            int startQuery = Integer.parseInt(columns[6]);
            int endQuery = Integer.parseInt(columns[7]);
            int startSubject = Integer.parseInt(columns[8]);
            int endSubject = Integer.parseInt(columns[9]);
            double evalue = Double.parseDouble(columns[10]);
            // double score = Double.parseDouble(columns[11])/proteinlengths[sourceInt];
            double score = Double.parseDouble(columns[11]) / Double.parseDouble(columns[3]);
            if (evalue < Double.MIN_VALUE)
                evalue = Double.MIN_VALUE;
            if (evalue > Config.blastCutoff) {
                score = 0;
                evalue = 0;
            } else {
                evalue = -Math.log10(evalue);
            }
            if (Config.blastCutoff > 1)
                evalue += normalizeFactorFromBlastCutoff;
            bf.setAll(i, startQuery, endQuery, startSubject, endSubject, sourceInt, targetInt, evalue, score);
            i++;
        }
    } catch (Exception e) {
    }
    br.close();
    if (Config.gui)
        Console.println();
    return bf;
}
Also used : BlastFile(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) File(java.io.File) BlastFile(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile) IOException(java.io.IOException)

Aggregations

BlastFile (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile)2 BufferedReader (java.io.BufferedReader)2 FileReader (java.io.FileReader)2 IOException (java.io.IOException)2 SourceHSPs (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.SourceHSPs)1 BufferedWriter (java.io.BufferedWriter)1 File (java.io.File)1 FileNotFoundException (java.io.FileNotFoundException)1 FileWriter (java.io.FileWriter)1 HashMap (java.util.HashMap)1 Iterator (java.util.Iterator)1