use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SmoothingSubtractionQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
if (!isReady()) {
progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
}
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
Vector<DataStore> quantitatedStores = new Vector<DataStore>();
DataSet[] sets = application.dataCollection().getAllDataSets();
for (int s = 0; s < sets.length; s++) {
if (sets[s].isQuantitated()) {
quantitatedStores.add(sets[s]);
}
}
DataGroup[] groups = application.dataCollection().getAllDataGroups();
for (int g = 0; g < groups.length; g++) {
if (groups[g].isQuantitated()) {
quantitatedStores.add(groups[g]);
}
}
DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
for (int c = 0; c < chromosomes.length; c++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(c, chromosomes.length);
Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
float[][] newValues = new float[data.length][allProbes.length];
try {
for (int p = 0; p < allProbes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// Find the min and max indices we're going to use.
int minIndex = p;
int maxIndex = p;
if (correctionAction == ADJACENT) {
minIndex = p - (distance / 2);
maxIndex = minIndex + (distance - 1);
if (minIndex < 0)
minIndex = 0;
if (maxIndex > allProbes.length - 1)
maxIndex = allProbes.length - 1;
} else if (correctionAction == WINDOW) {
for (int i = p; i >= 0; i--) {
if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
break;
}
minIndex = i;
}
for (int i = p; i < allProbes.length; i++) {
if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
break;
}
maxIndex = i;
}
}
// Now go through all of the datasets working out the new value for this range
float[] tempValues = new float[(maxIndex - minIndex) + 1];
for (int d = 0; d < data.length; d++) {
for (int i = minIndex; i <= maxIndex; i++) {
tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
}
newValues[d][p] = SimpleStats.mean(tempValues);
}
}
// Now assign the values for the probes on this chromosome
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < allProbes.length; p++) {
data[d].setValueForProbe(allProbes[p], data[d].getValueForProbe(allProbes[p]) - newValues[d][p]);
}
}
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class HiCCisTransQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
Probe[] probes = application.dataCollection().probeSet().getAllProbes();
for (int p = 0; p < probes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(p, probes.length);
for (int d = 0; d < data.length; d++) {
int cisCount = 0;
int transCount = 0;
HiCHitCollection hiCHits = data[d].getHiCReadsForProbe(probes[p]);
String[] chromosomeNames = hiCHits.getChromosomeNamesWithHits();
for (int c = 0; c < chromosomeNames.length; c++) {
long[] sourceReads = hiCHits.getSourcePositionsForChromosome(chromosomeNames[c]);
long[] hitReads = hiCHits.getHitPositionsForChromosome(chromosomeNames[c]);
for (int r = 0; r < sourceReads.length; r++) {
// Check if we can ignore this one
if (removeDuplicates) {
if (r > 0 && sourceReads[r] == sourceReads[r - 1] && hitReads[r] == hitReads[r - 1])
continue;
}
if (!chromosomeNames[c].equals(probes[p].chromosome().name())) {
++transCount;
} else {
if (includeFarCis) {
int distance = SequenceRead.fragmentLength(sourceReads[r], hitReads[r]);
if (distance > farCisDistance) {
++transCount;
} else {
// System.err.println("Distance was "+distance);
++cisCount;
}
} else {
++cisCount;
}
}
}
}
float percentage = ((transCount * 100f) / (cisCount + transCount));
if (cisCount + transCount == 0) {
percentage = 0;
}
// TODO: This is icky since the inheritance between HiCDataStore and DataStore
// isn't properly sorted out.
((DataStore) data[d]).setValueForProbe(probes[p], percentage);
}
}
if (correctPerChromosome) {
Chromosome[] chrs = application.dataCollection().genome().getAllChromosomes();
for (int c = 0; c < chrs.length; c++) {
Probe[] thisChrProbes = application.dataCollection().probeSet().getProbesForChromosome(chrs[c]);
float[] thisChrValues = new float[thisChrProbes.length];
for (int d = 0; d < data.length; d++) {
DataStore ds = (DataStore) data[d];
for (int p = 0; p < thisChrProbes.length; p++) {
try {
thisChrValues[p] = ds.getValueForProbe(thisChrProbes[p]);
} catch (SeqMonkException e) {
}
}
float median = SimpleStats.median(thisChrValues);
for (int p = 0; p < thisChrProbes.length; p++) {
try {
ds.setValueForProbe(thisChrProbes[p], ds.getValueForProbe(thisChrProbes[p]) - median);
} catch (SeqMonkException e) {
}
}
}
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SmoothingQuantitation method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
if (!isReady()) {
progressExceptionReceived(new SeqMonkException("Options weren't set correctly"));
}
Chromosome[] chromosomes = application.dataCollection().genome().getAllChromosomes();
Vector<DataStore> quantitatedStores = new Vector<DataStore>();
DataSet[] sets = application.dataCollection().getAllDataSets();
for (int s = 0; s < sets.length; s++) {
if (sets[s].isQuantitated()) {
quantitatedStores.add(sets[s]);
}
}
DataGroup[] groups = application.dataCollection().getAllDataGroups();
for (int g = 0; g < groups.length; g++) {
if (groups[g].isQuantitated()) {
quantitatedStores.add(groups[g]);
}
}
DataStore[] data = quantitatedStores.toArray(new DataStore[0]);
for (int c = 0; c < chromosomes.length; c++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
progressUpdated(c, chromosomes.length);
Probe[] allProbes = application.dataCollection().probeSet().getProbesForChromosome(chromosomes[c]);
float[][] newValues = new float[data.length][allProbes.length];
try {
for (int p = 0; p < allProbes.length; p++) {
// See if we need to quit
if (cancel) {
progressCancelled();
return;
}
// Find the min and max indices we're going to use.
int minIndex = p;
int maxIndex = p;
if (correctionAction == ADJACENT) {
minIndex = p - (distance / 2);
maxIndex = minIndex + (distance - 1);
if (minIndex < 0)
minIndex = 0;
if (maxIndex > allProbes.length - 1)
maxIndex = allProbes.length - 1;
} else if (correctionAction == WINDOW) {
for (int i = p; i >= 0; i--) {
if (allProbes[i].end() < allProbes[p].start() - (distance / 2)) {
break;
}
minIndex = i;
}
for (int i = p; i < allProbes.length; i++) {
if (allProbes[i].start() > allProbes[p].end() + (distance / 2)) {
break;
}
maxIndex = i;
}
}
// Now go through all of the datasets working out the new value for this range
float[] tempValues = new float[(maxIndex - minIndex) + 1];
for (int d = 0; d < data.length; d++) {
for (int i = minIndex; i <= maxIndex; i++) {
tempValues[i - minIndex] = data[d].getValueForProbe(allProbes[i]);
}
newValues[d][p] = SimpleStats.mean(tempValues);
}
}
// Now assign the values for the probes on this chromosome
for (int d = 0; d < data.length; d++) {
for (int p = 0; p < allProbes.length; p++) {
data[d].setValueForProbe(allProbes[p], newValues[d][p]);
}
}
} catch (SeqMonkException e) {
progressExceptionReceived(e);
}
}
quantitatonComplete();
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class GffFileParser method run.
/* (non-Javadoc)
* @see java.lang.Runnable#run()
*/
public void run() {
File[] probeFiles = getFiles();
DataSet[] newData = new DataSet[probeFiles.length];
int extendBy = prefs.extendReads();
try {
for (int f = 0; f < probeFiles.length; f++) {
BufferedReader br;
if (probeFiles[f].getName().toLowerCase().endsWith(".gz")) {
br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(probeFiles[f]))));
} else {
br = new BufferedReader(new FileReader(probeFiles[f]));
}
String line;
if (prefs.isHiC()) {
newData[f] = new PairedDataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates(), prefs.hiCDistance(), prefs.hiCIgnoreTrans());
} else {
newData[f] = new DataSet(probeFiles[f].getName(), probeFiles[f].getCanonicalPath(), prefs.removeDuplicates());
}
int lineCount = 0;
// Now process the file
while ((line = br.readLine()) != null) {
if (cancel) {
br.close();
progressCancelled();
return;
}
// Ignore blank lines
if (line.trim().length() == 0)
continue;
++lineCount;
if (lineCount % 100000 == 0) {
progressUpdated("Read " + lineCount + " lines from " + probeFiles[f].getName(), f, probeFiles.length);
}
String[] sections = line.split("\t");
// Check to see if we've got enough data to work with
if (sections.length < 5) {
progressWarningReceived(new SeqMonkException("Not enough data from line '" + line + "'"));
// Skip this line...
continue;
}
int strand;
int start;
int end;
try {
start = Integer.parseInt(sections[3]);
end = Integer.parseInt(sections[4]);
// End must always be later than start
if (start > end) {
int temp = start;
start = end;
end = temp;
}
if (sections.length >= 7) {
if (sections[6].equals("+")) {
strand = Location.FORWARD;
} else if (sections[6].equals("-")) {
strand = Location.REVERSE;
} else if (sections[6].equals(".")) {
strand = Location.UNKNOWN;
} else {
progressWarningReceived(new SeqMonkException("Unknown strand character '" + sections[6] + "' marked as unknown strand"));
strand = Location.UNKNOWN;
}
} else {
strand = Location.UNKNOWN;
}
if (extendBy > 0) {
if (strand == Location.FORWARD) {
end += extendBy;
} else if (strand == Location.REVERSE) {
start -= extendBy;
}
}
} catch (NumberFormatException e) {
progressWarningReceived(new SeqMonkException("Location " + sections[3] + "-" + sections[4] + " was not an integer"));
continue;
}
ChromosomeWithOffset c;
try {
c = dataCollection().genome().getChromosome(sections[0]);
} catch (IllegalArgumentException sme) {
progressWarningReceived(sme);
continue;
}
start = c.position(start);
end = c.position(end);
// We also don't allow readings which are beyond the end of the chromosome
if (end > c.chromosome().length()) {
int overrun = end - c.chromosome().length();
progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
continue;
}
// We can now make the new reading
try {
long read = SequenceRead.packPosition(start, end, strand);
newData[f].addData(c.chromosome(), read);
} catch (SeqMonkException e) {
progressWarningReceived(e);
continue;
}
}
// We're finished with the file.
br.close();
// Cache the data in the new dataset
progressUpdated("Caching data from " + probeFiles[f].getName(), f, probeFiles.length);
newData[f].finalise();
}
processingFinished(newData);
} catch (Exception ex) {
progressExceptionReceived(ex);
}
}
use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.
the class SeqMonkParser method parseGroups.
/**
* Parses the list of sample groups.
*
* @param sections The tab split values from the initial groups line
* @throws SeqMonkException
* @throws IOException Signals that an I/O exception has occurred.
*/
private void parseGroups(String[] sections) throws SeqMonkException, IOException {
if (sections.length != 2) {
throw new SeqMonkException("Data Groups line didn't contain 2 sections");
}
if (!sections[0].equals("Data Groups")) {
throw new SeqMonkException("Couldn't find expected data groups line");
}
int n = Integer.parseInt(sections[1]);
dataGroups = new DataGroup[n];
for (int i = 0; i < n; i++) {
String[] group = br.readLine().split("\\t");
DataSet[] groupMembers = new DataSet[group.length - 1];
if (thisDataVersion < 4) {
DataSet[] allSets = application.dataCollection().getAllDataSets();
// In the bad old days we used to refer to datasets by their name
for (int j = 1; j < group.length; j++) {
boolean seen = false;
for (int d = 0; d < allSets.length; d++) {
if (allSets[d].name().equals(group[j])) {
if (seen) {
// System.out.println("Seen this before - abort abort");
throw new SeqMonkException("Name " + group[j] + " is ambiguous in group " + group[0]);
}
// System.out.println("Seen for the first time");
groupMembers[j - 1] = allSets[d];
seen = true;
}
}
}
} else {
for (int j = 1; j < group.length; j++) {
// The more modern and safer way to make up a group is by its index
groupMembers[j - 1] = application.dataCollection().getDataSet(Integer.parseInt(group[j]));
if (groupMembers[j - 1] == null) {
throw new SeqMonkException("Couldn't find dataset at position " + group[j]);
}
}
}
DataGroup g = new DataGroup(group[0], groupMembers);
application.dataCollection().addDataGroup(g);
dataGroups[i] = g;
}
}
Aggregations