use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile in project clusterMaker2 by RBVI.
the class Creator method createHSPCluster.
@SuppressWarnings("unchecked")
private void createHSPCluster(BlastFile bf, HashMap<Integer, String> proteins2integers, HashMap<String, Integer> integers2proteins, int[] proteinLengths) {
HashMap<String, Integer> integers2proteinsClone = (HashMap<String, Integer>) integers2proteins.clone();
HashMap<Integer, String> proteins2integersClone = (HashMap<Integer, String>) proteins2integers.clone();
// integers2proteins.clear();
// proteins2integers.clear();
HashMap<String, SourceHSPs> sourceHSPs = new HashMap<String, SourceHSPs>();
for (Iterator<String> iterator = integers2proteinsClone.keySet().iterator(); iterator.hasNext(); ) {
String id = iterator.next();
int idInt = integers2proteinsClone.get(id);
SourceHSPs sHSP = new SourceHSPs(new ArrayList<boolean[]>(), proteinLengths[idInt], new ArrayList<Integer>(), id);
sourceHSPs.put(id, sHSP);
}
for (int i = 0; i < bf.size; i++) {
if (bf.getSource(i) == bf.getTarget(i))
continue;
SourceHSPs sourceHSP = sourceHSPs.get(proteins2integersClone.get(bf.getSource(i)));
SourceHSPs targetHSP = sourceHSPs.get(proteins2integersClone.get(bf.getTarget(i)));
boolean[] sourceCoverage = new boolean[proteinLengths[bf.getSource(i)]];
boolean[] targetCoverage = new boolean[proteinLengths[bf.getTarget(i)]];
Arrays.fill(sourceCoverage, bf.getStartQuery(i), bf.getEndQuery(i), true);
Arrays.fill(targetCoverage, bf.getStartSubject(i), bf.getEndSubject(i), true);
ArrayList<boolean[]> coveragesSource = sourceHSP.getCoverages();
sourceHSP.addLine(i);
if (coveragesSource.isEmpty()) {
coveragesSource.add(sourceCoverage);
sourceHSP.addCluster(0);
sourceHSP.addClusterLine(0);
} else {
boolean match = false;
for (int j = 0; j < coveragesSource.size(); j++) {
boolean[] coverage = coveragesSource.get(j);
float sim = calculateSimilarity(coverage, sourceCoverage);
if (sim > 0.8) {
match = true;
sourceHSP.addCluster(j);
break;
}
}
if (!match) {
coveragesSource.add(sourceCoverage);
sourceHSP.setClusternr(sourceHSP.getClusternr() + 1);
sourceHSP.addClusterLine(i);
sourceHSP.addCluster(sourceHSP.getClusternr());
}
}
ArrayList<boolean[]> coveragesTarget = targetHSP.getCoverages();
targetHSP.addLine(i);
if (coveragesTarget.isEmpty()) {
coveragesTarget.add(targetCoverage);
targetHSP.addCluster(0);
targetHSP.addClusterLine(0);
} else {
boolean match = false;
for (int j = 0; j < coveragesTarget.size(); j++) {
boolean[] coverage = coveragesTarget.get(j);
float sim = calculateSimilarity(coverage, targetCoverage);
if (sim > 0.8) {
match = true;
targetHSP.addCluster(j);
break;
}
}
if (!match) {
coveragesTarget.add(targetCoverage);
targetHSP.setClusternr(targetHSP.getClusternr() + 1);
targetHSP.addClusterLine(i);
targetHSP.addCluster(targetHSP.getClusternr());
}
}
}
for (Iterator iterator = sourceHSPs.keySet().iterator(); iterator.hasNext(); ) {
String key = (String) iterator.next();
SourceHSPs s = sourceHSPs.get(key);
}
try {
BufferedReader br = new BufferedReader(new FileReader(Config.blastFile));
BufferedWriter bw = new BufferedWriter(new FileWriter(Config.blastFile + "HSP"));
String line;
int k = 0;
while ((line = br.readLine()) != null) {
if (line.trim().equals(""))
continue;
String[] tabs = line.split("\t");
if (tabs[0].equals(tabs[1])) {
k++;
continue;
}
SourceHSPs source = sourceHSPs.get(tabs[0]);
SourceHSPs target = sourceHSPs.get(tabs[1]);
int sourceIndex = source.getLines().indexOf(k);
int targetIndex = target.getLines().indexOf(k);
bw.write(tabs[0] + "_HSP" + source.getCluster(sourceIndex) + "\t" + tabs[1] + "_HSP" + target.getCluster(targetIndex) + "\t");
for (int j = 2; j < tabs.length; j++) {
bw.write(tabs[j]);
if (j < tabs.length - 1) {
bw.write("\t");
} else {
bw.newLine();
}
}
k++;
}
br.close();
bw.flush();
bw.close();
br = new BufferedReader(new FileReader(Config.fastaFile));
bw = new BufferedWriter(new FileWriter(Config.fastaFile + "HSP"));
String sequence = "";
String id = "";
while ((line = br.readLine()) != null) {
if (line.trim().equals(""))
continue;
if (line.startsWith(">")) {
if (!sequence.equals("")) {
SourceHSPs source = sourceHSPs.get(id);
for (int j = 0; j <= source.getClusternr(); j++) {
bw.write(">" + id + "_HSP" + j);
bw.newLine();
bw.write(sequence);
bw.newLine();
}
sequence = "";
}
id = line.substring(1);
} else {
sequence += line;
}
}
SourceHSPs source = sourceHSPs.get(id);
for (int j = 0; j <= source.getClusternr(); j++) {
bw.write(">" + id + "_HSP" + j);
bw.newLine();
bw.write(sequence);
bw.newLine();
}
br.close();
bw.flush();
bw.close();
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile in project clusterMaker2 by RBVI.
the class Creator method run.
public void run(HashMap<Integer, String> proteins2integers, HashMap<String, Integer> integers2proteins) throws IOException {
if (Config.source == Config.BLAST) {
if (Config.gui)
Console.println("Read Fasta file ... ");
int[] proteinLengths = InOut.readFastaFile(Config.fastaFile, proteins2integers, integers2proteins);
if (Config.gui)
Console.println();
if (Config.gui)
Console.println("Read Blast file ... ");
BlastFile bf = InOut.readBlastFileWithArray(Config.blastFile, integers2proteins, proteinLengths);
if (Config.gui)
Console.println();
if (Config.gui)
Console.println("Create similarity file ...");
createSimilarityFileFromArray(Config.similarityFile, bf, proteins2integers, proteinLengths, Config.costModel);
if (Config.gui)
Console.println();
if (Config.splitAndWriteCostMatrices) {
bf = null;
System.gc();
}
} else {
// if(Config.gui) Console.println("Read Matrix ...");
// createSimilarityFileFromExpressionMatrix(Config.similarityFile, Config.expressionMatrix, Config.withHeader, Config.withRowDescription, Config.costModel);
}
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.BlastFile in project clusterMaker2 by RBVI.
the class InOut method readBlastFileWithArray.
public static BlastFile readBlastFileWithArray(String blastFile, HashMap<String, Integer> integers2proteins, int[] proteinlengths) throws IOException {
File f = new File(blastFile);
BufferedReader br = new BufferedReader(new FileReader(blastFile));
if (Config.gui) {
Console.restartBar(0, 100);
Console.setBarText("start counting lines of BLAST file");
} else {
// System.out.print("\t start counting lines of BLAST file ...");
}
int lineCount = countLines(f);
if (Config.gui) {
Console.println("" + lineCount);
Console.println();
} else {
}
BlastFile bf = new BlastFile(lineCount);
br.close();
br = new BufferedReader(new FileReader(blastFile));
if (Config.gui) {
Console.println("\t start reading BLAST file ...");
Console.restartBar(0, 100);
Console.setBarText("reading blast file");
} else {
}
int i = 0;
double percent = 0;
double percentOld = 0;
double normalizeFactorFromBlastCutoff = Math.log10(Config.blastCutoff);
try {
while (true) {
String line = br.readLine();
if (i % 10000 == 0 && i > 0) {
percent = Math.rint(((double) i / lineCount) * 10000) / 100;
if (percent > percentOld + 1) {
percentOld = percent;
if (Config.gui) {
Console.setBarValue((int) Math.rint(percent));
Console.setBarTextPlusRestTime("reading BLAST file " + percent + " %");
}
// else System.out.print(percent + " %" + "\t");
}
}
String[] columns = line.split(TAB);
String source = columns[0];
int sourceInt = integers2proteins.get(source);
String target = columns[1];
int targetInt = integers2proteins.get(target);
int startQuery = Integer.parseInt(columns[6]);
int endQuery = Integer.parseInt(columns[7]);
int startSubject = Integer.parseInt(columns[8]);
int endSubject = Integer.parseInt(columns[9]);
double evalue = Double.parseDouble(columns[10]);
// double score = Double.parseDouble(columns[11])/proteinlengths[sourceInt];
double score = Double.parseDouble(columns[11]) / Double.parseDouble(columns[3]);
if (evalue < Double.MIN_VALUE)
evalue = Double.MIN_VALUE;
if (evalue > Config.blastCutoff) {
score = 0;
evalue = 0;
} else {
evalue = -Math.log10(evalue);
}
if (Config.blastCutoff > 1)
evalue += normalizeFactorFromBlastCutoff;
bf.setAll(i, startQuery, endQuery, startSubject, endSubject, sourceInt, targetInt, evalue, score);
i++;
}
} catch (Exception e) {
}
br.close();
if (Config.gui)
Console.println();
return bf;
}
Aggregations