use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.networkClusterers.TransClust.de.costmatrixcreation.dataTypes.SourceHSPs in project clusterMaker2 by RBVI.
the class Creator method createHSPCluster.
@SuppressWarnings("unchecked")
private void createHSPCluster(BlastFile bf, HashMap<Integer, String> proteins2integers, HashMap<String, Integer> integers2proteins, int[] proteinLengths) {
HashMap<String, Integer> integers2proteinsClone = (HashMap<String, Integer>) integers2proteins.clone();
HashMap<Integer, String> proteins2integersClone = (HashMap<Integer, String>) proteins2integers.clone();
// integers2proteins.clear();
// proteins2integers.clear();
HashMap<String, SourceHSPs> sourceHSPs = new HashMap<String, SourceHSPs>();
for (Iterator<String> iterator = integers2proteinsClone.keySet().iterator(); iterator.hasNext(); ) {
String id = iterator.next();
int idInt = integers2proteinsClone.get(id);
SourceHSPs sHSP = new SourceHSPs(new ArrayList<boolean[]>(), proteinLengths[idInt], new ArrayList<Integer>(), id);
sourceHSPs.put(id, sHSP);
}
for (int i = 0; i < bf.size; i++) {
if (bf.getSource(i) == bf.getTarget(i))
continue;
SourceHSPs sourceHSP = sourceHSPs.get(proteins2integersClone.get(bf.getSource(i)));
SourceHSPs targetHSP = sourceHSPs.get(proteins2integersClone.get(bf.getTarget(i)));
boolean[] sourceCoverage = new boolean[proteinLengths[bf.getSource(i)]];
boolean[] targetCoverage = new boolean[proteinLengths[bf.getTarget(i)]];
Arrays.fill(sourceCoverage, bf.getStartQuery(i), bf.getEndQuery(i), true);
Arrays.fill(targetCoverage, bf.getStartSubject(i), bf.getEndSubject(i), true);
ArrayList<boolean[]> coveragesSource = sourceHSP.getCoverages();
sourceHSP.addLine(i);
if (coveragesSource.isEmpty()) {
coveragesSource.add(sourceCoverage);
sourceHSP.addCluster(0);
sourceHSP.addClusterLine(0);
} else {
boolean match = false;
for (int j = 0; j < coveragesSource.size(); j++) {
boolean[] coverage = coveragesSource.get(j);
float sim = calculateSimilarity(coverage, sourceCoverage);
if (sim > 0.8) {
match = true;
sourceHSP.addCluster(j);
break;
}
}
if (!match) {
coveragesSource.add(sourceCoverage);
sourceHSP.setClusternr(sourceHSP.getClusternr() + 1);
sourceHSP.addClusterLine(i);
sourceHSP.addCluster(sourceHSP.getClusternr());
}
}
ArrayList<boolean[]> coveragesTarget = targetHSP.getCoverages();
targetHSP.addLine(i);
if (coveragesTarget.isEmpty()) {
coveragesTarget.add(targetCoverage);
targetHSP.addCluster(0);
targetHSP.addClusterLine(0);
} else {
boolean match = false;
for (int j = 0; j < coveragesTarget.size(); j++) {
boolean[] coverage = coveragesTarget.get(j);
float sim = calculateSimilarity(coverage, targetCoverage);
if (sim > 0.8) {
match = true;
targetHSP.addCluster(j);
break;
}
}
if (!match) {
coveragesTarget.add(targetCoverage);
targetHSP.setClusternr(targetHSP.getClusternr() + 1);
targetHSP.addClusterLine(i);
targetHSP.addCluster(targetHSP.getClusternr());
}
}
}
for (Iterator iterator = sourceHSPs.keySet().iterator(); iterator.hasNext(); ) {
String key = (String) iterator.next();
SourceHSPs s = sourceHSPs.get(key);
}
try {
BufferedReader br = new BufferedReader(new FileReader(Config.blastFile));
BufferedWriter bw = new BufferedWriter(new FileWriter(Config.blastFile + "HSP"));
String line;
int k = 0;
while ((line = br.readLine()) != null) {
if (line.trim().equals(""))
continue;
String[] tabs = line.split("\t");
if (tabs[0].equals(tabs[1])) {
k++;
continue;
}
SourceHSPs source = sourceHSPs.get(tabs[0]);
SourceHSPs target = sourceHSPs.get(tabs[1]);
int sourceIndex = source.getLines().indexOf(k);
int targetIndex = target.getLines().indexOf(k);
bw.write(tabs[0] + "_HSP" + source.getCluster(sourceIndex) + "\t" + tabs[1] + "_HSP" + target.getCluster(targetIndex) + "\t");
for (int j = 2; j < tabs.length; j++) {
bw.write(tabs[j]);
if (j < tabs.length - 1) {
bw.write("\t");
} else {
bw.newLine();
}
}
k++;
}
br.close();
bw.flush();
bw.close();
br = new BufferedReader(new FileReader(Config.fastaFile));
bw = new BufferedWriter(new FileWriter(Config.fastaFile + "HSP"));
String sequence = "";
String id = "";
while ((line = br.readLine()) != null) {
if (line.trim().equals(""))
continue;
if (line.startsWith(">")) {
if (!sequence.equals("")) {
SourceHSPs source = sourceHSPs.get(id);
for (int j = 0; j <= source.getClusternr(); j++) {
bw.write(">" + id + "_HSP" + j);
bw.newLine();
bw.write(sequence);
bw.newLine();
}
sequence = "";
}
id = line.substring(1);
} else {
sequence += line;
}
}
SourceHSPs source = sourceHSPs.get(id);
for (int j = 0; j <= source.getClusternr(); j++) {
bw.write(">" + id + "_HSP" + j);
bw.newLine();
bw.write(sequence);
bw.newLine();
}
br.close();
bw.flush();
bw.close();
} catch (Exception e) {
e.printStackTrace();
System.exit(1);
}
}
Aggregations