Search in sources :

Example 66 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class MonteCarloFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
protected void generateProbeList() {
    Probe[] probes = toList.getAllProbes();
    ArrayList<Probe> allProbes = new ArrayList<Probe>();
    Probe[] fromProbes = startingList.getAllProbes();
    try {
        for (int i = 0; i < fromProbes.length; i++) {
            allProbes.add(fromProbes[i]);
        }
        int passedCount = 0;
        float targetValue = getValue(probes);
        for (int iteration = 0; iteration < iterationCount; iteration++) {
            if (iteration % 100 == 0) {
                progressUpdated("Performed " + iteration + " iterations", iteration, iterationCount);
            }
            if (cancel) {
                progressCancelled();
                return;
            }
            Probe[] theseProbes = makeRandomProbes(allProbes, probes.length);
            float value = getValue(theseProbes);
            if (value >= targetValue) {
                ++passedCount;
            }
        }
        float pValue = ((float) passedCount) / iterationCount;
        ProbeList newList = new ProbeList(toList, "", "", "P-value");
        for (int i = 0; i < probes.length; i++) {
            newList.addProbe(probes[i], pValue);
        }
        filterFinished(newList);
    } catch (SeqMonkException sme) {
        progressExceptionReceived(sme);
        return;
    }
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) ArrayList(java.util.ArrayList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 67 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class GenomeDownloader method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // First we need to download the file from the repository
    try {
        // System.out.println("Downloading "+prefs.getGenomeDownloadLocation()+species+"/"+assembly+".zip");
        URL url = new URL((new String(prefs.getGenomeDownloadLocation() + species + "/" + assembly + ".zip")).replaceAll(" ", "%20"));
        URLConnection connection = url.openConnection();
        connection.setUseCaches(allowCaching);
        InputStream is = connection.getInputStream();
        DataInputStream d = new DataInputStream(new BufferedInputStream(is));
        // System.out.println("Output file is "+prefs.getGenomeBase()+"/"+assembly+".zip");
        File f = new File(prefs.getGenomeBase() + "/" + assembly + ".zip");
        DataOutputStream o;
        try {
            o = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(f)));
        } catch (FileNotFoundException fnfe) {
            throw new SeqMonkException("Could't write into your genomes directory.  Please check your file preferences.");
        }
        byte[] b = new byte[1024];
        int totalBytes = 0;
        int i;
        while ((i = d.read(b)) > 0) {
            // System.out.println("Read "+totalBytes+" bytes");
            o.write(b, 0, i);
            totalBytes += i;
            Enumeration<ProgressListener> en = listeners.elements();
            while (en.hasMoreElements()) {
                en.nextElement().progressUpdated("Downloaded " + totalBytes / 1048576 + "Mb", totalBytes, size);
            }
        }
        d.close();
        o.close();
        File outFile = new File(prefs.getGenomeBase() + "/" + species + "/" + assembly);
        outFile.mkdirs();
        // Now we can uncompress the downloaded file to create the genome
        ZipFile zipFile = new ZipFile(f);
        Enumeration<? extends ZipEntry> e = zipFile.entries();
        int count = 0;
        while (e.hasMoreElements()) {
            count++;
            Enumeration<ProgressListener> en = listeners.elements();
            while (en.hasMoreElements()) {
                en.nextElement().progressUpdated("Unzipped " + count + " files", count, zipFile.size());
            }
            ZipEntry ze = e.nextElement();
            if (ze.isDirectory())
                continue;
            // System.out.println("Found entry "+ze.getName()+" with size "+ze.getSize());
            BufferedOutputStream out;
            try {
                out = new BufferedOutputStream(new FileOutputStream(prefs.getGenomeBase() + "/" + ze.getName()));
            } catch (FileNotFoundException fnfe) {
                throw new SeqMonkException("Couldn't write into the genomes directory.  Please check your file permissions or change your genomes folder.");
            }
            BufferedInputStream in = new BufferedInputStream(zipFile.getInputStream(ze));
            while ((i = in.read(b)) > 0) {
                out.write(b, 0, i);
            }
            in.close();
            out.close();
        }
        // Get rid of the zip file so we don't waste disk space.
        zipFile.close();
        f.delete();
        // Finally we need to clear out any cache files which exist from a previous
        // installation of this genome so the new one will be cached the next time
        // it's loaded.
        File cacheDir = new File(outFile.getAbsoluteFile() + "/cache");
        if (cacheDir.exists()) {
            File[] cacheFiles = cacheDir.listFiles();
            for (int cf = 0; cf < cacheFiles.length; cf++) {
                if (cacheFiles[cf].isFile() && cacheFiles[cf].getName().indexOf("cache") >= 0) {
                    if (!cacheFiles[cf].delete()) {
                        System.out.println("Failed to delete cache file " + cacheFiles[cf].getAbsolutePath());
                    }
                }
            }
        }
    } catch (Exception ex) {
        ProgressListener[] en = listeners.toArray(new ProgressListener[0]);
        for (int i = en.length - 1; i >= 0; i--) {
            en[i].progressExceptionReceived(ex);
        }
        return;
    }
    // Tell everyone we're finished
    /*
		 * Something odd happens here on my linux system.  If I notify the listeners
		 * in the usual order then I'm told that there are two listeners, but the loop
		 * through these listeners (either via Enumeration or array) only notifies one
		 * (SeqMonkApplication) and the progress dialog is never told.
		 * 
		 * If I notify them in reverse order then it works as expected, but I can't see
		 * why telling the application first should stop further processing.
		 * 
		 * On my windows system I don't get this problem.
		 * 
		 */
    ProgressListener[] en = listeners.toArray(new ProgressListener[0]);
    for (int i = en.length - 1; i >= 0; i--) {
        en[i].progressComplete("genome_downloaded", null);
    }
}
Also used : DataInputStream(java.io.DataInputStream) BufferedInputStream(java.io.BufferedInputStream) InputStream(java.io.InputStream) DataOutputStream(java.io.DataOutputStream) ZipEntry(java.util.zip.ZipEntry) FileNotFoundException(java.io.FileNotFoundException) DataInputStream(java.io.DataInputStream) URL(java.net.URL) URLConnection(java.net.URLConnection) FileNotFoundException(java.io.FileNotFoundException) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) ProgressListener(uk.ac.babraham.SeqMonk.DataTypes.ProgressListener) ZipFile(java.util.zip.ZipFile) BufferedInputStream(java.io.BufferedInputStream) FileOutputStream(java.io.FileOutputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) File(java.io.File) ZipFile(java.util.zip.ZipFile) BufferedOutputStream(java.io.BufferedOutputStream)

Example 68 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class SplicingEfficiencyPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    boolean correctReadLength = optionsPanel.correctForReadLength();
    boolean logTransform = optionsPanel.logTransform();
    boolean applyTranscriptLengthCorrection = optionsPanel.applyTranscriptLengthCorrection();
    // We need to make a lookup for chromosomes from their names for later on.
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    Hashtable<String, Chromosome> chrLookup = new Hashtable<String, Chromosome>();
    for (int c = 0; c < chrs.length; c++) {
        chrLookup.put(chrs[c].name(), chrs[c]);
    }
    // We're going to cache the gene to transcript mappings so we don't have to re-do them
    // later on.
    Hashtable<String, Vector<Feature>> transcriptLookup = new Hashtable<String, Vector<Feature>>();
    Vector<Feature> usedGeneFeatures = new Vector<Feature>();
    // We'll record the number of failures so we can report it
    int noTranscriptCount = 0;
    int noIntronCount = 0;
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Making features for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] geneFeatures = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedGeneFeatureType());
        Arrays.sort(geneFeatures);
        Feature[] transcriptFeatures = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedTranscriptFeatureType());
        Arrays.sort(transcriptFeatures);
        // We need to figure out whether we can match based on name, or whether we need to do it based on location.
        boolean matchOnNames = true;
        for (int t = 0; t < transcriptFeatures.length; t++) {
            if (!transcriptFeatures[t].name().matches("-\\d\\d\\d$")) {
                matchOnNames = false;
                break;
            }
        }
        if (!matchOnNames) {
            progressWarningReceived(new SeqMonkException("Non-Ensembl transcript names found - matching based on position"));
        }
        if (matchOnNames) {
            for (int i = 0; i < transcriptFeatures.length; i++) {
                String name = transcriptFeatures[i].name();
                name = name.replaceAll("-\\d\\d\\d$", "");
                if (!transcriptLookup.containsKey(name)) {
                    transcriptLookup.put(name, new Vector<Feature>());
                }
                transcriptLookup.get(name).add(transcriptFeatures[i]);
            }
        } else {
            // We need to go through the genes and transcripts in parallel and match them up based on containment
            // and direction.
            int lastGeneIndex = 0;
            for (int t = 0; t < transcriptFeatures.length; t++) {
                for (int g = lastGeneIndex; g < geneFeatures.length; g++) {
                    if (transcriptFeatures[t].location().start() > geneFeatures[g].location().end()) {
                        lastGeneIndex = g;
                        continue;
                    }
                    // If the gene is beyond the end of the transcript we can stop looking
                    if (geneFeatures[g].location().start() > transcriptFeatures[t].location().end()) {
                        break;
                    }
                    // If we're on the same strand and contained within the gene then we have a match
                    if (geneFeatures[g].location().strand() == transcriptFeatures[t].location().strand() && transcriptFeatures[t].location().start() >= geneFeatures[g].location().start() && transcriptFeatures[t].location().end() <= geneFeatures[g].location().end()) {
                        String name = geneFeatures[g].name();
                        if (!transcriptLookup.containsKey(name)) {
                            transcriptLookup.put(name, new Vector<Feature>());
                        }
                        transcriptLookup.get(name).add(transcriptFeatures[t]);
                    }
                }
            }
        }
        for (int f = 0; f < geneFeatures.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            if (!transcriptLookup.containsKey(geneFeatures[f].name())) {
                ++noTranscriptCount;
                // No good making features for genes with no transcript.
                continue;
            }
            Feature[] localTranscripts = transcriptLookup.get(geneFeatures[f].name()).toArray(new Feature[0]);
            boolean[] exonPositions = new boolean[geneFeatures[f].location().length()];
            for (int t = 0; t < localTranscripts.length; t++) {
                if (!SequenceRead.overlaps(geneFeatures[f].location().packedPosition(), localTranscripts[t].location().packedPosition())) {
                    // It's not a match for this gene
                    continue;
                }
                if (!(localTranscripts[t].location() instanceof SplitLocation)) {
                    for (int p = localTranscripts[t].location().start() - geneFeatures[f].location().start(); p <= localTranscripts[t].location().end() - geneFeatures[f].location().start(); p++) {
                        if (p < 0)
                            continue;
                        if (p >= exonPositions.length)
                            continue;
                        exonPositions[p] = true;
                    }
                    continue;
                }
                Location[] exons = ((SplitLocation) localTranscripts[t].location()).subLocations();
                for (int e = 0; e < exons.length; e++) {
                    for (int p = exons[e].start() - geneFeatures[f].location().start(); p <= exons[e].end() - geneFeatures[f].location().start(); p++) {
                        if (p < 0)
                            continue;
                        if (p >= exonPositions.length)
                            continue;
                        exonPositions[p] = true;
                    }
                }
            }
            int exonPositionCount = 0;
            int intronPositionCount = 0;
            for (int p = 0; p < exonPositions.length; p++) {
                if (exonPositions[p])
                    exonPositionCount++;
                else
                    intronPositionCount++;
            }
            if (exonPositionCount == 0) {
                ++noTranscriptCount;
                continue;
            }
            if (intronPositionCount == 0) {
                ++noIntronCount;
                continue;
            }
            probes.add(new Probe(chrs[c], geneFeatures[f].location().start(), geneFeatures[f].location().end(), geneFeatures[f].location().strand(), geneFeatures[f].name()));
            usedGeneFeatures.add(geneFeatures[f]);
        }
    }
    if (noTranscriptCount > 0) {
        progressWarningReceived(new SeqMonkException("" + noTranscriptCount + " genes had no associated transcripts"));
    }
    if (noIntronCount > 0) {
        progressWarningReceived(new SeqMonkException("" + noIntronCount + " genes had no intron only areas"));
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Gene features over " + optionsPanel.getSelectedGeneFeatureType(), allProbes));
    // Having made probes we now need to quantitate them.  We'll get the full set of reads for the full gene
    // and then work out a combined set of exons.  We can then quantitate the bases which fall into introns
    // and exons separately and then work out a ratio from there.
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    Feature[] geneFeatures = usedGeneFeatures.toArray(new Feature[0]);
    for (int g = 0; g < geneFeatures.length; g++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        if (g % 100 == 0) {
            progressUpdated("Quantitating features", g, geneFeatures.length);
        }
        int[] readLengths = new int[data.length];
        if (correctReadLength) {
            for (int d = 0; d < data.length; d++) {
                readLengths[d] = data[d].getMaxReadLength();
            }
        }
        // Find the set of transcripts which relate to this gene.
        Feature[] transcripts = transcriptLookup.get(geneFeatures[g].name()).toArray(new Feature[0]);
        Vector<Feature> validTranscripts = new Vector<Feature>();
        for (int t = 0; t < transcripts.length; t++) {
            if (SequenceRead.overlaps(geneFeatures[g].location().packedPosition(), transcripts[t].location().packedPosition())) {
                validTranscripts.add(transcripts[t]);
            }
        }
        transcripts = validTranscripts.toArray(new Feature[0]);
        // before so if we do then something has gone wrong.
        if (transcripts.length == 0) {
            throw new IllegalStateException("No transcripts for gene " + geneFeatures[g] + " on chr " + geneFeatures[g].chromosomeName());
        }
        Vector<Location> allExonsVector = new Vector<Location>();
        for (int t = 0; t < transcripts.length; t++) {
            if (transcripts[t].location() instanceof SplitLocation) {
                Location[] sublocs = ((SplitLocation) transcripts[t].location()).subLocations();
                for (int i = 0; i < sublocs.length; i++) allExonsVector.add(sublocs[i]);
            } else {
                allExonsVector.add(transcripts[t].location());
            }
        }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // System.err.println("Cpa6 had "+allExonsVector.size()+" total exons");
        // }
        Collections.sort(allExonsVector);
        // Now go through and make a merged set of exons to remove any redundancy
        Vector<Location> mergedExonsVector = new Vector<Location>();
        Location lastLocation = null;
        Enumeration<Location> en = allExonsVector.elements();
        while (en.hasMoreElements()) {
            Location l = en.nextElement();
            if (lastLocation == null) {
                // if (geneFeatures[f].name().equals("Cpa6")) {
                // System.err.println("Setting as first location");
                // }
                lastLocation = l;
                continue;
            }
            // Check if it's the same as the last, which is likely
            if (l.start() == lastLocation.start() && l.end() == lastLocation.end()) {
                continue;
            }
            // Check if they overlap and can be merged
            if (l.start() <= lastLocation.end() && l.end() >= lastLocation.start()) {
                // if (geneFeatures[f].name().equals("Cpa6")) {
                // System.err.println("Overlaps with last location");
                // }
                // It overlaps with the last location so merge them
                lastLocation = new Location(Math.min(l.start(), lastLocation.start()), Math.max(l.end(), lastLocation.end()), geneFeatures[g].location().strand());
                // }
                continue;
            }
            // Start a new location
            // if (geneFeatures[f].name().equals("Cpa6")) {
            // System.err.println("Doesn't overlap - adding last location and creating new one");
            // }
            mergedExonsVector.add(lastLocation);
            lastLocation = l;
        }
        if (lastLocation != null) {
            mergedExonsVector.add(lastLocation);
        }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // System.err.println("Cpa6 had "+mergedExonsVector.size()+" merged exons");
        // }
        // Now we can start the quantitation.
        int intronLength = geneFeatures[g].location().length();
        int exonLength = 0;
        Location[] subLocs = mergedExonsVector.toArray(new Location[0]);
        for (int l = 0; l < subLocs.length; l++) {
            exonLength += subLocs[l].length();
        }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // System.err.println("Cpa6 total intron length="+intronLength+" exon length="+exonLength);
        // }
        intronLength -= exonLength;
        if (intronLength <= 0) {
            progressWarningReceived(new IllegalStateException("Intron length of " + intronLength + " for gene " + geneFeatures[g]));
            for (int d = 0; d < data.length; d++) {
                data[d].setValueForProbe(allProbes[g], Float.NaN);
            }
            continue;
        }
        if (exonLength <= 0) {
            progressWarningReceived(new IllegalStateException("Exon length of " + exonLength + " for gene " + geneFeatures[g]));
            for (int d = 0; d < data.length; d++) {
                data[d].setValueForProbe(allProbes[g], Float.NaN);
            }
            continue;
        }
        for (int d = 0; d < data.length; d++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            int totalIntronCount = 0;
            int totalExonCount = 0;
            long[] reads = data[d].getReadsForProbe(new Probe(chrLookup.get(geneFeatures[g].chromosomeName()), geneFeatures[g].location().start(), geneFeatures[g].location().end()));
            for (int r = 0; r < reads.length; r++) {
                if (!readFilter.useRead(geneFeatures[g].location(), reads[r])) {
                    continue;
                }
                int overlap = (Math.min(geneFeatures[g].location().end(), SequenceRead.end(reads[r])) - Math.max(geneFeatures[g].location().start(), SequenceRead.start(reads[r]))) + 1;
                totalIntronCount += overlap;
                // Now we see if we overlap with any of the exons
                for (int s = 0; s < subLocs.length; s++) {
                    if (subLocs[s].start() <= SequenceRead.end(reads[r]) && subLocs[s].end() >= SequenceRead.start(reads[r])) {
                        int exonOverlap = (Math.min(subLocs[s].end(), SequenceRead.end(reads[r])) - Math.max(subLocs[s].start(), SequenceRead.start(reads[r]))) + 1;
                        if (exonOverlap > 0) {
                            totalExonCount += exonOverlap;
                            totalIntronCount -= exonOverlap;
                        }
                    }
                }
            }
            if (totalIntronCount < 0) {
                progressWarningReceived(new SeqMonkException("Intron count of " + totalIntronCount + " for " + geneFeatures[g].name()));
                continue;
            }
            // reads which this count could comprise, rounding down to a whole number.
            if (correctReadLength) {
                totalIntronCount /= readLengths[d];
                totalExonCount /= readLengths[d];
            }
            float intronValue = totalIntronCount;
            float exonValue = totalExonCount;
            // If we're log transforming then we need to set zero values to 0.9
            if (logTransform && intronValue == 0) {
                intronValue = 0.9f;
            }
            if (logTransform && exonValue == 0) {
                exonValue = 0.9f;
            }
            // been asked to.
            if (applyTranscriptLengthCorrection) {
                intronValue /= (intronLength / 1000f);
                exonValue /= (exonLength / 1000f);
            }
            // We also correct by the total read count, or length
            if (correctReadLength) {
                intronValue /= (data[d].getTotalReadCount() / 1000000f);
                exonValue /= (data[d].getTotalReadCount() / 1000000f);
            } else {
                intronValue /= (data[d].getTotalReadLength() / 1000000f);
                exonValue /= (data[d].getTotalReadLength() / 1000000f);
            }
            // Finally we do the log transform if we've been asked to
            if (logTransform) {
                if (intronValue == 0) {
                    intronValue = 0.0001f;
                }
                if (exonValue == 0) {
                    exonValue = 0.0001f;
                }
                intronValue = (float) Math.log(intronValue) / log2;
                exonValue = (float) Math.log(exonValue) / log2;
            }
            // Now we check what value they actually wanted to record
            switch(optionsPanel.quantitationType()) {
                case (EXONS):
                    {
                        data[d].setValueForProbe(allProbes[g], exonValue);
                        break;
                    }
                case (INTRONS):
                    {
                        data[d].setValueForProbe(allProbes[g], intronValue);
                        break;
                    }
                case (RATIO):
                    {
                        if (logTransform) {
                            data[d].setValueForProbe(allProbes[g], exonValue - intronValue);
                        } else {
                            if (intronValue == 0) {
                                intronValue = 0.0001f;
                            }
                            data[d].setValueForProbe(allProbes[g], exonValue / intronValue);
                        }
                        break;
                    }
                default:
                    throw new IllegalStateException("Unknonwn quantitation type " + optionsPanel.quantitationType());
            }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // try {
        // System.err.println("Stored value was "+data[d].getValueForProbe(allProbes[currentIndex]));
        // } catch (SeqMonkException e) {
        // e.printStackTrace();
        // }
        // }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Splicing efficiency quantitation");
    if (correctReadLength) {
        quantitationDescription.append(" counting reads");
    } else {
        quantitationDescription.append(" counting bases");
    }
    if (optionsPanel.logTransform()) {
        quantitationDescription.append(" log transformed");
    }
    if (applyTranscriptLengthCorrection) {
        quantitationDescription.append(" correcting for feature length");
    }
    // TODO: Add more description
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) Hashtable(java.util.Hashtable) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 69 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class ValuesFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    // System.out.println("Data store size="+stores.length+" lower="+lowerLimit+" upper="+upperLimit+" type="+limitType+" chosen="+chosenNumber);
    Probe[] probes = startingList.getAllProbes();
    ProbeList newList = new ProbeList(startingList, "Filtered Probes", "", null);
    for (int p = 0; p < probes.length; p++) {
        progressUpdated(p, probes.length);
        if (cancel) {
            cancel = false;
            progressCancelled();
            return;
        }
        int count = 0;
        for (int s = 0; s < stores.length; s++) {
            float d = 0;
            if (!stores[s].hasValueForProbe(probes[p]))
                continue;
            try {
                d = stores[s].getValueForProbe(probes[p]);
            } catch (SeqMonkException e) {
                e.printStackTrace();
                continue;
            }
            // NaN values always fail the filter.
            if (Float.isNaN(d))
                continue;
            // Now we have the value we need to know if it passes the test
            if (upperLimit != null)
                if (d > upperLimit)
                    continue;
            if (lowerLimit != null)
                if (d < lowerLimit)
                    continue;
            // This one passes, we can add it to the count
            ++count;
        }
        // probe to the probe set.
        switch(limitType) {
            case EXACTLY:
                if (count == chosenNumber)
                    newList.addProbe(probes[p], null);
                break;
            case AT_LEAST:
                if (count >= chosenNumber)
                    newList.addProbe(probes[p], null);
                break;
            case NO_MORE_THAN:
                if (count <= chosenNumber)
                    newList.addProbe(probes[p], null);
                break;
        }
    }
    newList.setName("Value between " + lowerLimit + "-" + upperLimit);
    filterFinished(newList);
}
Also used : ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 70 with SeqMonkException

use of uk.ac.babraham.SeqMonk.SeqMonkException in project SeqMonk by s-andrews.

the class WindowedReplicateStatsFilter method generateProbeList.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.Filters.ProbeFilter#generateProbeList()
	 */
@Override
protected void generateProbeList() {
    Chromosome[] chromosomes = collection.genome().getAllChromosomes();
    Vector<ProbeGroupTTestValue> newListProbesVector = new Vector<ProbeGroupTTestValue>();
    for (int c = 0; c < chromosomes.length; c++) {
        progressUpdated("Processing windows on Chr" + chromosomes[c].name(), c, chromosomes.length);
        Probe[] probes = startingList.getProbesForChromosome(chromosomes[c]);
        ProbeGroupGenerator gen = null;
        if (windowType == DISTANCE_WINDOW) {
            gen = new ProbeWindowGenerator(probes, windowSize);
        } else if (windowType == CONSECUTIVE_WINDOW) {
            gen = new ConsecutiveProbeGenerator(probes, windowSize);
        } else if (windowType == FEATURE_WINDOW) {
            gen = new FeatureProbeGroupGenerator(probes, collection.genome().annotationCollection().getFeaturesForType(optionsPanel.featureTypeBox.getSelectedItem().toString()));
        }
        while (true) {
            if (cancel) {
                cancel = false;
                progressCancelled();
                return;
            }
            Probe[] theseProbes = gen.nextSet();
            if (theseProbes == null) {
                // System.err.println("List of probes was null");
                break;
            }
            // We need at least 3 probes in a set to calculate a p-value
            if (theseProbes.length < 3) {
                // System.err.println("Only "+theseProbes.length+" probes in the set");
                continue;
            }
            double[][] values = new double[stores.length][];
            for (int j = 0; j < stores.length; j++) {
                if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
                    values[j] = new double[theseProbes.length * ((ReplicateSet) stores[j]).dataStores().length];
                } else {
                    values[j] = new double[theseProbes.length];
                }
            }
            for (int j = 0; j < stores.length; j++) {
                int index = 0;
                for (int i = 0; i < theseProbes.length; i++) {
                    try {
                        if (splitReplicateSets & stores[j] instanceof ReplicateSet) {
                            DataStore[] localStores = ((ReplicateSet) stores[j]).dataStores();
                            for (int l = 0; l < localStores.length; l++) {
                                values[j][index] = localStores[l].getValueForProbe(theseProbes[i]);
                                index++;
                            }
                        } else {
                            values[j][index] = stores[j].getValueForProbe(theseProbes[i]);
                            index++;
                        }
                    } catch (SeqMonkException e) {
                    }
                }
                if (index != values[j].length) {
                    throw new IllegalStateException("Didn't fill all values total=" + values[j].length + " index=" + index);
                }
            }
            double pValue = 0;
            try {
                if (stores.length == 1) {
                    pValue = TTest.calculatePValue(values[0], 0);
                } else if (stores.length == 2) {
                    pValue = TTest.calculatePValue(values[0], values[1]);
                } else {
                    pValue = AnovaTest.calculatePValue(values);
                }
            } catch (SeqMonkException e) {
                throw new IllegalStateException(e);
            }
            newListProbesVector.add(new ProbeGroupTTestValue(theseProbes, pValue));
        }
    }
    ProbeGroupTTestValue[] newListProbes = newListProbesVector.toArray(new ProbeGroupTTestValue[0]);
    // Do the multi-testing correction if necessary
    if (multiTest) {
        BenjHochFDR.calculateQValues(newListProbes);
    }
    ProbeList newList;
    // We need to handle duplicate hits internally since probe lists can't do
    // this themselves any more.
    Hashtable<Probe, Float> newListTemp = new Hashtable<Probe, Float>();
    if (multiTest) {
        newList = new ProbeList(startingList, "", "", "Q-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].q <= cutoff) {
                Probe[] passedProbes = newListProbes[i].probes;
                for (int p = 0; p < passedProbes.length; p++) {
                    if (newListTemp.containsKey(passedProbes[p])) {
                        // We always give a probe the lowest possible q-value
                        if (newListTemp.get(passedProbes[p]) <= newListProbes[i].q) {
                            continue;
                        }
                    }
                    newListTemp.put(passedProbes[p], (float) newListProbes[i].q);
                }
            }
        }
    } else {
        newList = new ProbeList(startingList, "", "", "P-value");
        for (int i = 0; i < newListProbes.length; i++) {
            if (newListProbes[i].p <= cutoff) {
                Probe[] passedProbes = newListProbes[i].probes;
                for (int p = 0; p < passedProbes.length; p++) {
                    if (newListTemp.containsKey(passedProbes[p])) {
                        // We always give a probe the lowest possible p-value
                        if (newListTemp.get(passedProbes[p]) <= newListProbes[i].p) {
                            continue;
                        }
                    }
                    newListTemp.put(passedProbes[p], (float) newListProbes[i].p);
                }
            }
        }
    }
    // Add the cached hits to the new list
    Enumeration<Probe> en = newListTemp.keys();
    while (en.hasMoreElements()) {
        Probe p = en.nextElement();
        newList.addProbe(p, newListTemp.get(p));
    }
    filterFinished(newList);
}
Also used : ReplicateSet(uk.ac.babraham.SeqMonk.DataTypes.ReplicateSet) ProbeWindowGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeWindowGenerator) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) DataStore(uk.ac.babraham.SeqMonk.DataTypes.DataStore) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) ProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ProbeGroupGenerator) FeatureProbeGroupGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.FeatureProbeGroupGenerator) ProbeList(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList) Hashtable(java.util.Hashtable) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) ProbeGroupTTestValue(uk.ac.babraham.SeqMonk.Analysis.Statistics.ProbeGroupTTestValue) ConsecutiveProbeGenerator(uk.ac.babraham.SeqMonk.Filters.ProbeGroupGenerator.ConsecutiveProbeGenerator)

Aggregations

SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)91 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)49 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)30 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)22 Vector (java.util.Vector)21 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)20 File (java.io.File)19 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)17 BufferedReader (java.io.BufferedReader)16 FileReader (java.io.FileReader)16 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)14 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)13 FileInputStream (java.io.FileInputStream)11 IOException (java.io.IOException)11 InputStreamReader (java.io.InputStreamReader)11 GZIPInputStream (java.util.zip.GZIPInputStream)11 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)8 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)8 FileNotFoundException (java.io.FileNotFoundException)7 SequenceReadWithChromosome (uk.ac.babraham.SeqMonk.DataTypes.Sequence.SequenceReadWithChromosome)7