use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class EvenCoverageProbeGenerator method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
Vector<Probe> newProbes = new Vector<Probe>();
for (int c = 0; c < chromosomes.length; c++) {
// Time for an update
updateGenerationProgress("Processed " + c + " chromosomes", c, chromosomes.length);
// We'll merge together the reads for all of the selected DataStores and
// compute a single set of probes which covers all of them.
ReadsWithCounts[] v = new ReadsWithCounts[selectedStores.length];
for (int s = 0; s < selectedStores.length; s++) {
v[s] = selectedStores[s].getReadsForChromosome(chromosomes[c]);
}
ReadsWithCounts reads = new ReadsWithCounts(v);
v = null;
if (reads.totalCount() == 0) {
// System.err.println("Skipping strand "+strandsToTry[strand]+" on chr "+chromosomes[c]);
continue;
}
int strandForNewProbes = Location.UNKNOWN;
int start = SequenceRead.start(reads.reads[0]);
int end = SequenceRead.end(reads.reads[0]);
int count = reads.counts[0];
for (int r = 1; r < reads.reads.length; r++) {
// See if we need to quit
if (cancel) {
generationCancelled();
return;
}
if (count > 0 && ((maxSize > 0 && (SequenceRead.end(reads.reads[r]) - start) + 1 > maxSize)) || (count + reads.counts[r] > targetReadCount)) {
// Make a probe out of what we have and start a new one with the
// read we're currently looking at
Probe p = new Probe(chromosomes[c], start, end, strandForNewProbes);
newProbes.add(p);
start = end + 1;
end = Math.max(end + 2, SequenceRead.end(reads.reads[r]));
count = SequenceRead.end(reads.counts[r]);
} else {
count += reads.counts[r];
if (SequenceRead.end(reads.reads[r]) > end) {
end = SequenceRead.end(reads.reads[r]);
}
}
}
if (count > 0) {
Probe p = new Probe(chromosomes[c], start, end, strandForNewProbes);
newProbes.add(p);
}
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
newProbes.clear();
ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
generationComplete(finalSet);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class FeatureProbeGenerator method removeDuplicates.
/**
* Removes the duplicates.
*
* @param original the original
* @return the probe[]
*/
private Probe[] removeDuplicates(Probe[] original) {
if (original == null || original.length == 0)
return original;
Arrays.sort(original);
Vector<Probe> keepers = new Vector<Probe>();
Probe lastProbe = original[0];
keepers.add(original[0]);
for (int i = 1; i < original.length; i++) {
if (original[i].start() == lastProbe.start() && original[i].end() == lastProbe.end() && original[i].chromosome().equals(lastProbe.chromosome())) {
// This is a duplicate
continue;
}
keepers.add(original[i]);
lastProbe = original[i];
}
return keepers.toArray(new Probe[0]);
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class MacsPeakCaller method getReadsFromDataStoreCollection.
private long[] getReadsFromDataStoreCollection(Probe probe, DataStore[] stores, int strandOffset) {
Probe p = probe;
if (strandOffset > 0) {
p = new Probe(probe.chromosome(), Math.max(probe.start() - strandOffset, 1), Math.min(probe.end() + strandOffset, probe.chromosome().length()));
}
long[][] allProbeReads = new long[stores.length][];
for (int d = 0; d < stores.length; d++) {
allProbeReads[d] = stores[d].getReadsForProbe(p);
}
int totalProbeReadCount = 0;
for (int i = 0; i < allProbeReads.length; i++) {
totalProbeReadCount += allProbeReads[i].length;
}
long[] mergedChrReads = new long[totalProbeReadCount];
int index = 0;
for (int i = 0; i < allProbeReads.length; i++) {
for (int j = 0; j < allProbeReads[i].length; j++) {
if (strandOffset == 0 || SequenceRead.strand(allProbeReads[i][j]) == Location.UNKNOWN) {
mergedChrReads[index] = allProbeReads[i][j];
} else if (SequenceRead.strand(allProbeReads[i][j]) == Location.FORWARD) {
mergedChrReads[index] = SequenceRead.packPosition(Math.min(SequenceRead.start(allProbeReads[i][j]) + strandOffset, p.chromosome().length()), Math.min(SequenceRead.end(allProbeReads[i][j]) + strandOffset, p.chromosome().length()), SequenceRead.strand(allProbeReads[i][j]));
} else if (SequenceRead.strand(allProbeReads[i][j]) == Location.REVERSE) {
mergedChrReads[index] = SequenceRead.packPosition(Math.max(SequenceRead.start(allProbeReads[i][j]) - strandOffset, 1), Math.max(SequenceRead.end(allProbeReads[i][j]) - strandOffset, 1), SequenceRead.strand(allProbeReads[i][j]));
}
index++;
}
allProbeReads[i] = null;
}
return mergedChrReads;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class RandomProbeGenerator method designPerChromosome.
private ProbeSet designPerChromosome() {
Chromosome[] chromosomes = collection.genome().getAllChromosomes();
long[] offsets = new long[chromosomes.length];
offsets[0] = chromosomes[0].length();
for (int i = 1; i < chromosomes.length; i++) {
offsets[i] = offsets[i - 1] + chromosomes[i].length();
}
// Now make up the probes
Vector<Probe> newProbes = new Vector<Probe>();
Random rand = new Random();
while (newProbes.size() < numberToGenerate) {
if (cancel) {
return null;
}
if (newProbes.size() % 1000 == 0) {
updateGenerationProgress("Designed " + newProbes.size() + " probes", newProbes.size(), numberToGenerate);
}
// Select a random position in the genome
long random = (long) (collection.genome().getTotalGenomeLength() * rand.nextDouble());
Chromosome chromosome = null;
int start = 0;
int end = 0;
// Find out which chromosome this comes from
for (int o = 0; o < offsets.length; o++) {
if (random < offsets[o]) {
chromosome = chromosomes[o];
if (o > 0) {
start = (int) (random - offsets[o - 1]) + 1;
} else {
start = (int) random + 1;
}
end = start + (windowSize - 1);
break;
}
}
if (end > chromosome.length()) {
// Try again
continue;
}
// Make a probe
newProbes.add(new Probe(chromosome, start, end));
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
return finalSet;
}
use of uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe in project SeqMonk by s-andrews.
the class RandomProbeGenerator method designPerProbe.
private ProbeSet designPerProbe() {
Probe[] probes = collection.probeSet().getActiveList().getAllProbes();
// TODO: We could remove all probes which are smaller than the windows we're going to
// generate?
long[] offsets = new long[probes.length];
offsets[0] = probes[0].length();
for (int i = 1; i < probes.length; i++) {
offsets[i] = offsets[i - 1] + probes[i].length();
}
long totalLength = offsets[offsets.length - 1];
// Now make up the probes
Vector<Probe> newProbes = new Vector<Probe>();
Random rand = new Random();
RANDLOOP: while (newProbes.size() < numberToGenerate) {
if (cancel) {
return null;
}
if (newProbes.size() % 1000 == 0) {
updateGenerationProgress("Designed " + newProbes.size() + " probes", newProbes.size(), numberToGenerate);
}
// Select a random position in the genome
long random = (long) (totalLength * rand.nextDouble());
Chromosome chromosome = null;
int start = 0;
int end = 0;
// Find out which chromosome this comes from
for (int o = 0; o < offsets.length; o++) {
if (random < offsets[o]) {
chromosome = probes[o].chromosome();
if (o > 0) {
start = (int) (random - offsets[o - 1]) + probes[o].start();
} else {
start = (int) random + probes[o].start();
}
end = start + (windowSize - 1);
// Check that this is valid
if (end > probes[o].end()) {
// Try again
continue RANDLOOP;
}
break;
}
}
// Make a probe
newProbes.add(new Probe(chromosome, start, end));
}
Probe[] finalList = newProbes.toArray(new Probe[0]);
ProbeSet finalSet = new ProbeSet(getDescription(), finalList);
return finalSet;
}
Aggregations