Search in sources :

Example 11 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class RNASeqPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    boolean mergeTranscripts = optionsPanel.mergeTranscripts();
    boolean pairedEnd = optionsPanel.pairedEnd();
    boolean logTransform = optionsPanel.logTransform();
    boolean applyTranscriptLengthCorrection = optionsPanel.applyTranscriptLengthCorrection();
    boolean rawCounts = optionsPanel.rawCounts();
    boolean noValueForZeroCounts = optionsPanel.noValueForZeroCounts();
    boolean correctDNAContamination = optionsPanel.correctForDNAContamination();
    boolean correctDuplication = optionsPanel.correctForDNADuplication();
    if (rawCounts) {
        logTransform = false;
        applyTranscriptLengthCorrection = false;
        noValueForZeroCounts = false;
    }
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        // System.err.println("Processing chr "+chrs[c].name());
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Making features for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        Arrays.sort(features);
        FeatureGroup[] mergedTranscripts = mergeTranscripts(features, mergeTranscripts);
        for (int f = 0; f < mergedTranscripts.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            probes.add(new Probe(chrs[c], mergedTranscripts[f].start(), mergedTranscripts[f].end(), mergedTranscripts[f].strand(), mergedTranscripts[f].name()));
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    Arrays.sort(allProbes);
    if (collection().probeSet() == null) {
        collection().setProbeSet(new ProbeSet("Transcript features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    } else {
        Probe[] existingProbes = collection().probeSet().getAllProbes();
        Arrays.sort(existingProbes);
        if (allProbes.length != existingProbes.length) {
            collection().setProbeSet(new ProbeSet("Transcript features over " + optionsPanel.getSelectedFeatureType(), allProbes));
        } else {
            // Check the positions against the new ones
            boolean areTheyTheSame = true;
            for (int p = 0; p < allProbes.length; p++) {
                if (allProbes[p].packedPosition() != existingProbes[p].packedPosition()) {
                    areTheyTheSame = false;
                    break;
                }
            }
            if (areTheyTheSame) {
                allProbes = existingProbes;
            } else {
                collection().setProbeSet(new ProbeSet("Transcript features over " + optionsPanel.getSelectedFeatureType(), allProbes));
            }
        }
    }
    // If we're correcting for DNA contamination we need to work out the average density of
    // reads in intergenic regions
    float[] dnaDensityPerKb = new float[data.length];
    int[] correctedTotalCounts = new int[data.length];
    if (correctDNAContamination) {
        // We need to make interstitial probes to the set we already have, ignoring those at the end of chromosomes
        Vector<Probe> intergenicProbes = new Vector<Probe>();
        Chromosome lastChr = allProbes[0].chromosome();
        for (int p = 1; p < allProbes.length; p++) {
            if (allProbes[p].chromosome() != lastChr) {
                lastChr = allProbes[p].chromosome();
                continue;
            }
            // See if there's a gap back to the last probe
            if (allProbes[p].start() > allProbes[p - 1].end()) {
                if (allProbes[p].start() - allProbes[p - 1].end() < 1000) {
                    // Don't bother with really short probes
                    continue;
                }
                intergenicProbes.add(new Probe(lastChr, allProbes[p - 1].end() + 1, allProbes[p].start() - 1));
            }
        }
        Probe[] allIntergenicProbes = intergenicProbes.toArray(new Probe[0]);
        for (int d = 0; d < data.length; d++) {
            progressUpdated("Quantitating DNA contamination", 1, 2);
            float[] densities = new float[allIntergenicProbes.length];
            for (int p = 0; p < allIntergenicProbes.length; p++) {
                densities[p] = data[d].getReadsForProbe(allIntergenicProbes[p]).length / (allIntergenicProbes[p].length() / 1000f);
            }
            dnaDensityPerKb[d] = SimpleStats.median(densities);
        }
        // Work out adjusted total counts having subtracted the DNA contamination
        for (int d = 0; d < data.length; d++) {
            int predictedContamination = (int) (dnaDensityPerKb[d] * (SeqMonkApplication.getInstance().dataCollection().genome().getTotalGenomeLength() / 1000));
            int correctedTotalReadCount = data[d].getTotalReadCount() - predictedContamination;
            correctedTotalCounts[d] = correctedTotalReadCount;
        }
        // Halve the density if they're doing a directional quantitation
        if (optionsPanel.isDirectional()) {
            for (int i = 0; i < dnaDensityPerKb.length; i++) {
                dnaDensityPerKb[i] /= 2;
            }
        }
        // Halve the density if the libraries are paired end
        if (pairedEnd) {
            for (int i = 0; i < dnaDensityPerKb.length; i++) {
                dnaDensityPerKb[i] /= 2;
            }
        }
    }
    // If we're correcting for duplication we need to work out the modal count depth in
    // intergenic regions
    int[] modalDuplicationLevels = new int[data.length];
    if (correctDuplication) {
        for (int d = 0; d < data.length; d++) {
            progressUpdated("Quantitating DNA duplication", 1, 2);
            // We're not going to look at depths which are > 200.  If it's that duplicated
            // then there's no point continuing anyway.
            int[] depthCount = new int[200];
            for (int p = 0; p < allProbes.length; p++) {
                long[] reads = data[d].getReadsForProbe(allProbes[p]);
                int currentCount = 0;
                for (int r = 1; r < reads.length; r++) {
                    if (reads[r] == reads[r - 1]) {
                        ++currentCount;
                    } else {
                        if (currentCount > 0 && currentCount < 200) {
                            ++depthCount[currentCount];
                        }
                        currentCount = 1;
                    }
                }
            }
            // Find the modal depth in intergenic regions. This is the best estimate
            // of duplication
            // Since unique reads turn up all over the place even in duplicated
            // data we say that if unique reads are higher than the sum of 2-10 there
            // is no duplication
            int twoTenSum = 0;
            for (int i = 2; i <= 10; i++) {
                twoTenSum += depthCount[i];
            }
            if (depthCount[1] > twoTenSum) {
                modalDuplicationLevels[d] = 1;
            } else {
                int highestDepth = 0;
                int bestDupGuess = 1;
                for (int i = 2; i < depthCount.length; i++) {
                    // System.err.println("For depth "+i+" count was "+depthCount[i]);
                    if (depthCount[i] > highestDepth) {
                        bestDupGuess = i;
                        highestDepth = depthCount[i];
                    }
                }
                modalDuplicationLevels[d] = bestDupGuess;
            }
        }
    }
    // Having made probes we now need to quantitate them.  We'll fetch the
    // probes overlapping each sub-feature and then aggregate these together
    // to get the final quantitation.
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    int currentIndex = 0;
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        Arrays.sort(features);
        FeatureGroup[] mergedTranscripts = mergeTranscripts(features, mergeTranscripts);
        int[] readLengths = new int[data.length];
        for (int d = 0; d < data.length; d++) {
            readLengths[d] = data[d].getMaxReadLength();
            // actual length.
            if (pairedEnd) {
                readLengths[d] *= 2;
            }
        }
        for (int f = 0; f < mergedTranscripts.length; f++) {
            Location[] subLocations = mergedTranscripts[f].getSubLocations();
            int totalLength = 0;
            // Find the total length of all of the exons
            for (int s = 0; s < subLocations.length; s++) {
                totalLength += subLocations[s].length();
            }
            for (int d = 0; d < data.length; d++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                long totalCount = 0;
                for (int s = 0; s < subLocations.length; s++) {
                    long[] reads = data[d].getReadsForProbe(new Probe(chrs[c], subLocations[s].start(), subLocations[s].end()));
                    for (int r = 0; r < reads.length; r++) {
                        if (!readFilter.useRead(subLocations[s], reads[r])) {
                            continue;
                        }
                        int overlap = (Math.min(subLocations[s].end(), SequenceRead.end(reads[r])) - Math.max(subLocations[s].start(), SequenceRead.start(reads[r]))) + 1;
                        totalCount += overlap;
                    }
                }
                // Now we correct the count by the total length of reads in the data and by
                // the length of the split parts of the probe, and assign this to the probe.
                // As we're correcting for read length then we work out the whole number of
                // reads which this count could comprise, rounding down to a whole number.
                totalCount /= readLengths[d];
                // We can now subtract the DNA contamination prediction.
                if (correctDNAContamination) {
                    int predictedContamination = (int) ((totalLength / 1000f) * dnaDensityPerKb[d]);
                    totalCount -= predictedContamination;
                    // Makes no sense to have negative counts
                    if (totalCount < 0)
                        totalCount = 0;
                }
                // ..and we can divide by the duplication level if we know it.
                if (correctDuplication) {
                    totalCount /= modalDuplicationLevels[d];
                }
                // System.err.println("Total read count for "+mergedTranscripts[f].name+" is "+totalCount);
                float value = totalCount;
                if (value == 0 && noValueForZeroCounts) {
                    value = Float.NaN;
                }
                // If we're log transforming then we need to set zero values to 0.9
                if (logTransform && value == 0 && !noValueForZeroCounts) {
                    value = 0.9f;
                }
                // been asked to.
                if (applyTranscriptLengthCorrection) {
                    value /= (totalLength / 1000f);
                }
                // We also correct by the total read count
                if (!rawCounts) {
                    // System.err.println("True total is "+data[d].getTotalReadCount()+" corrected total is "+correctedTotalCounts[d]);
                    // If these libraries are paired end then the total number of
                    // reads is also effectively halved.
                    float totalReadCount;
                    // calculated this already, but otherwise we'll take the total count (total length/read length)
                    if (correctDNAContamination) {
                        totalReadCount = correctedTotalCounts[d];
                    } else {
                        totalReadCount = data[d].getTotalReadLength() / readLengths[d];
                    }
                    // If we're correcting for duplication we divide by the duplication level.
                    if (correctDuplication) {
                        totalReadCount /= modalDuplicationLevels[d];
                    }
                    // Finally we work out millions of reads (single end) or fragments (paired end)
                    if (pairedEnd) {
                        totalReadCount /= 2000000f;
                    } else {
                        totalReadCount /= 1000000f;
                    }
                    // Lastly we divide the value by the total millions of reads to get the globally corrected count.
                    value /= totalReadCount;
                }
                // Finally we do the log transform if we've been asked to
                if (logTransform) {
                    value = (float) Math.log(value) / log2;
                }
                data[d].setValueForProbe(allProbes[currentIndex], value);
            }
            currentIndex++;
        }
    }
    collection().probeSet().setCurrentQuantitation(getQuantitationDescription(mergeTranscripts, applyTranscriptLengthCorrection, correctDNAContamination, logTransform, rawCounts));
    // If we estimated any parameters let's report them.
    if (correctDNAContamination || correctDuplication) {
        float[] dna = null;
        if (correctDNAContamination) {
            dna = dnaDensityPerKb;
        }
        int[] dup = null;
        if (correctDuplication) {
            dup = modalDuplicationLevels;
        }
        RNASeqParametersModel model = new RNASeqParametersModel(data, dna, dup);
        ReportTableDialog report = new ReportTableDialog(SeqMonkApplication.getInstance(), new Report(null, null) {

            @Override
            public void run() {
            }

            @Override
            public String name() {
                return "RNA-Seq parameter";
            }

            @Override
            public boolean isReady() {
                return true;
            }

            @Override
            public JPanel getOptionsPanel() {
                return null;
            }

            @Override
            public void generateReport() {
            }
        }, model);
    }
    quantitatonComplete();
}
Also used : JPanel(javax.swing.JPanel) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) ReportTableDialog(uk.ac.babraham.SeqMonk.Displays.Report.ReportTableDialog) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector) Report(uk.ac.babraham.SeqMonk.Reports.Report) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Example 12 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class GeneTrapQuantitationPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    boolean logTransform = optionsPanel.logTransform();
    boolean rawCounts = optionsPanel.rawCountsBox.isSelected();
    if (rawCounts) {
        logTransform = false;
    }
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        // System.err.println("Processing chr "+chrs[c].name());
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Making features for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        Arrays.sort(features);
        FeatureGroup[] mergedTranscripts = mergeTranscripts(features);
        for (int f = 0; f < mergedTranscripts.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            probes.add(new Probe(chrs[c], mergedTranscripts[f].start(), mergedTranscripts[f].end(), mergedTranscripts[f].strand(), mergedTranscripts[f].name));
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Transcript features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Having made probes we now need to quantitate them.  We'll fetch the
    // probes overlapping each sub-feature and then aggregate these together
    // to get the final quantitation.
    int currentIndex = 0;
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Quantitating features on chr" + chrs[c].name(), chrs.length + c, chrs.length * 2);
        Feature[] features = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedFeatureType());
        Arrays.sort(features);
        FeatureGroup[] mergedTranscripts = mergeTranscripts(features);
        for (int f = 0; f < mergedTranscripts.length; f++) {
            Location[] subLocations = mergedTranscripts[f].getSubLocations();
            for (int d = 0; d < data.length; d++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                int totalCount = 0;
                long[] reads = data[d].getReadsForProbe(new Probe(chrs[c], mergedTranscripts[f].start(), mergedTranscripts[f].end()));
                READ: for (int r = 0; r < reads.length; r++) {
                    if (SequenceRead.strand(reads[r]) == mergedTranscripts[f].strand()) {
                        // TODO: Should we check if we're within the CDS?
                        ++totalCount;
                        continue;
                    }
                    // We'll still count this if it overlaps with an exon
                    for (int s = 0; s < subLocations.length; s++) {
                        if (SequenceRead.overlaps(reads[r], subLocations[s].packedPosition())) {
                            ++totalCount;
                            continue READ;
                        }
                    }
                }
                float value = totalCount;
                // If we're log transforming then we need to set zero values to 0.9
                if (logTransform && value == 0) {
                    value = 0.9f;
                }
                // We also correct by the total read count
                if (!rawCounts) {
                    value /= (data[d].getTotalReadCount() / 1000000f);
                }
                // Finally we do the log transform if we've been asked to
                if (logTransform) {
                    value = (float) Math.log(value) / log2;
                }
                data[d].setValueForProbe(allProbes[currentIndex], value);
            }
            currentIndex++;
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Gene trap quantitation on transcripts of type ");
    quantitationDescription.append(optionsPanel.getSelectedFeatureType());
    if (logTransform) {
        quantitationDescription.append(". Log transformed");
    }
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) Vector(java.util.Vector) LongVector(uk.ac.babraham.SeqMonk.Utilities.LongVector) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 13 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class SplicingEfficiencyPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    boolean correctReadLength = optionsPanel.correctForReadLength();
    boolean logTransform = optionsPanel.logTransform();
    boolean applyTranscriptLengthCorrection = optionsPanel.applyTranscriptLengthCorrection();
    // We need to make a lookup for chromosomes from their names for later on.
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    Hashtable<String, Chromosome> chrLookup = new Hashtable<String, Chromosome>();
    for (int c = 0; c < chrs.length; c++) {
        chrLookup.put(chrs[c].name(), chrs[c]);
    }
    // We're going to cache the gene to transcript mappings so we don't have to re-do them
    // later on.
    Hashtable<String, Vector<Feature>> transcriptLookup = new Hashtable<String, Vector<Feature>>();
    Vector<Feature> usedGeneFeatures = new Vector<Feature>();
    // We'll record the number of failures so we can report it
    int noTranscriptCount = 0;
    int noIntronCount = 0;
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        progressUpdated("Making features for chr" + chrs[c].name(), c, chrs.length * 2);
        Feature[] geneFeatures = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedGeneFeatureType());
        Arrays.sort(geneFeatures);
        Feature[] transcriptFeatures = collection().genome().annotationCollection().getFeaturesForType(chrs[c], optionsPanel.getSelectedTranscriptFeatureType());
        Arrays.sort(transcriptFeatures);
        // We need to figure out whether we can match based on name, or whether we need to do it based on location.
        boolean matchOnNames = true;
        for (int t = 0; t < transcriptFeatures.length; t++) {
            if (!transcriptFeatures[t].name().matches("-\\d\\d\\d$")) {
                matchOnNames = false;
                break;
            }
        }
        if (!matchOnNames) {
            progressWarningReceived(new SeqMonkException("Non-Ensembl transcript names found - matching based on position"));
        }
        if (matchOnNames) {
            for (int i = 0; i < transcriptFeatures.length; i++) {
                String name = transcriptFeatures[i].name();
                name = name.replaceAll("-\\d\\d\\d$", "");
                if (!transcriptLookup.containsKey(name)) {
                    transcriptLookup.put(name, new Vector<Feature>());
                }
                transcriptLookup.get(name).add(transcriptFeatures[i]);
            }
        } else {
            // We need to go through the genes and transcripts in parallel and match them up based on containment
            // and direction.
            int lastGeneIndex = 0;
            for (int t = 0; t < transcriptFeatures.length; t++) {
                for (int g = lastGeneIndex; g < geneFeatures.length; g++) {
                    if (transcriptFeatures[t].location().start() > geneFeatures[g].location().end()) {
                        lastGeneIndex = g;
                        continue;
                    }
                    // If the gene is beyond the end of the transcript we can stop looking
                    if (geneFeatures[g].location().start() > transcriptFeatures[t].location().end()) {
                        break;
                    }
                    // If we're on the same strand and contained within the gene then we have a match
                    if (geneFeatures[g].location().strand() == transcriptFeatures[t].location().strand() && transcriptFeatures[t].location().start() >= geneFeatures[g].location().start() && transcriptFeatures[t].location().end() <= geneFeatures[g].location().end()) {
                        String name = geneFeatures[g].name();
                        if (!transcriptLookup.containsKey(name)) {
                            transcriptLookup.put(name, new Vector<Feature>());
                        }
                        transcriptLookup.get(name).add(transcriptFeatures[t]);
                    }
                }
            }
        }
        for (int f = 0; f < geneFeatures.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            if (!transcriptLookup.containsKey(geneFeatures[f].name())) {
                ++noTranscriptCount;
                // No good making features for genes with no transcript.
                continue;
            }
            Feature[] localTranscripts = transcriptLookup.get(geneFeatures[f].name()).toArray(new Feature[0]);
            boolean[] exonPositions = new boolean[geneFeatures[f].location().length()];
            for (int t = 0; t < localTranscripts.length; t++) {
                if (!SequenceRead.overlaps(geneFeatures[f].location().packedPosition(), localTranscripts[t].location().packedPosition())) {
                    // It's not a match for this gene
                    continue;
                }
                if (!(localTranscripts[t].location() instanceof SplitLocation)) {
                    for (int p = localTranscripts[t].location().start() - geneFeatures[f].location().start(); p <= localTranscripts[t].location().end() - geneFeatures[f].location().start(); p++) {
                        if (p < 0)
                            continue;
                        if (p >= exonPositions.length)
                            continue;
                        exonPositions[p] = true;
                    }
                    continue;
                }
                Location[] exons = ((SplitLocation) localTranscripts[t].location()).subLocations();
                for (int e = 0; e < exons.length; e++) {
                    for (int p = exons[e].start() - geneFeatures[f].location().start(); p <= exons[e].end() - geneFeatures[f].location().start(); p++) {
                        if (p < 0)
                            continue;
                        if (p >= exonPositions.length)
                            continue;
                        exonPositions[p] = true;
                    }
                }
            }
            int exonPositionCount = 0;
            int intronPositionCount = 0;
            for (int p = 0; p < exonPositions.length; p++) {
                if (exonPositions[p])
                    exonPositionCount++;
                else
                    intronPositionCount++;
            }
            if (exonPositionCount == 0) {
                ++noTranscriptCount;
                continue;
            }
            if (intronPositionCount == 0) {
                ++noIntronCount;
                continue;
            }
            probes.add(new Probe(chrs[c], geneFeatures[f].location().start(), geneFeatures[f].location().end(), geneFeatures[f].location().strand(), geneFeatures[f].name()));
            usedGeneFeatures.add(geneFeatures[f]);
        }
    }
    if (noTranscriptCount > 0) {
        progressWarningReceived(new SeqMonkException("" + noTranscriptCount + " genes had no associated transcripts"));
    }
    if (noIntronCount > 0) {
        progressWarningReceived(new SeqMonkException("" + noIntronCount + " genes had no intron only areas"));
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Gene features over " + optionsPanel.getSelectedGeneFeatureType(), allProbes));
    // Having made probes we now need to quantitate them.  We'll get the full set of reads for the full gene
    // and then work out a combined set of exons.  We can then quantitate the bases which fall into introns
    // and exons separately and then work out a ratio from there.
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    Feature[] geneFeatures = usedGeneFeatures.toArray(new Feature[0]);
    for (int g = 0; g < geneFeatures.length; g++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        if (g % 100 == 0) {
            progressUpdated("Quantitating features", g, geneFeatures.length);
        }
        int[] readLengths = new int[data.length];
        if (correctReadLength) {
            for (int d = 0; d < data.length; d++) {
                readLengths[d] = data[d].getMaxReadLength();
            }
        }
        // Find the set of transcripts which relate to this gene.
        Feature[] transcripts = transcriptLookup.get(geneFeatures[g].name()).toArray(new Feature[0]);
        Vector<Feature> validTranscripts = new Vector<Feature>();
        for (int t = 0; t < transcripts.length; t++) {
            if (SequenceRead.overlaps(geneFeatures[g].location().packedPosition(), transcripts[t].location().packedPosition())) {
                validTranscripts.add(transcripts[t]);
            }
        }
        transcripts = validTranscripts.toArray(new Feature[0]);
        // before so if we do then something has gone wrong.
        if (transcripts.length == 0) {
            throw new IllegalStateException("No transcripts for gene " + geneFeatures[g] + " on chr " + geneFeatures[g].chromosomeName());
        }
        Vector<Location> allExonsVector = new Vector<Location>();
        for (int t = 0; t < transcripts.length; t++) {
            if (transcripts[t].location() instanceof SplitLocation) {
                Location[] sublocs = ((SplitLocation) transcripts[t].location()).subLocations();
                for (int i = 0; i < sublocs.length; i++) allExonsVector.add(sublocs[i]);
            } else {
                allExonsVector.add(transcripts[t].location());
            }
        }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // System.err.println("Cpa6 had "+allExonsVector.size()+" total exons");
        // }
        Collections.sort(allExonsVector);
        // Now go through and make a merged set of exons to remove any redundancy
        Vector<Location> mergedExonsVector = new Vector<Location>();
        Location lastLocation = null;
        Enumeration<Location> en = allExonsVector.elements();
        while (en.hasMoreElements()) {
            Location l = en.nextElement();
            if (lastLocation == null) {
                // if (geneFeatures[f].name().equals("Cpa6")) {
                // System.err.println("Setting as first location");
                // }
                lastLocation = l;
                continue;
            }
            // Check if it's the same as the last, which is likely
            if (l.start() == lastLocation.start() && l.end() == lastLocation.end()) {
                continue;
            }
            // Check if they overlap and can be merged
            if (l.start() <= lastLocation.end() && l.end() >= lastLocation.start()) {
                // if (geneFeatures[f].name().equals("Cpa6")) {
                // System.err.println("Overlaps with last location");
                // }
                // It overlaps with the last location so merge them
                lastLocation = new Location(Math.min(l.start(), lastLocation.start()), Math.max(l.end(), lastLocation.end()), geneFeatures[g].location().strand());
                // }
                continue;
            }
            // Start a new location
            // if (geneFeatures[f].name().equals("Cpa6")) {
            // System.err.println("Doesn't overlap - adding last location and creating new one");
            // }
            mergedExonsVector.add(lastLocation);
            lastLocation = l;
        }
        if (lastLocation != null) {
            mergedExonsVector.add(lastLocation);
        }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // System.err.println("Cpa6 had "+mergedExonsVector.size()+" merged exons");
        // }
        // Now we can start the quantitation.
        int intronLength = geneFeatures[g].location().length();
        int exonLength = 0;
        Location[] subLocs = mergedExonsVector.toArray(new Location[0]);
        for (int l = 0; l < subLocs.length; l++) {
            exonLength += subLocs[l].length();
        }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // System.err.println("Cpa6 total intron length="+intronLength+" exon length="+exonLength);
        // }
        intronLength -= exonLength;
        if (intronLength <= 0) {
            progressWarningReceived(new IllegalStateException("Intron length of " + intronLength + " for gene " + geneFeatures[g]));
            for (int d = 0; d < data.length; d++) {
                data[d].setValueForProbe(allProbes[g], Float.NaN);
            }
            continue;
        }
        if (exonLength <= 0) {
            progressWarningReceived(new IllegalStateException("Exon length of " + exonLength + " for gene " + geneFeatures[g]));
            for (int d = 0; d < data.length; d++) {
                data[d].setValueForProbe(allProbes[g], Float.NaN);
            }
            continue;
        }
        for (int d = 0; d < data.length; d++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            int totalIntronCount = 0;
            int totalExonCount = 0;
            long[] reads = data[d].getReadsForProbe(new Probe(chrLookup.get(geneFeatures[g].chromosomeName()), geneFeatures[g].location().start(), geneFeatures[g].location().end()));
            for (int r = 0; r < reads.length; r++) {
                if (!readFilter.useRead(geneFeatures[g].location(), reads[r])) {
                    continue;
                }
                int overlap = (Math.min(geneFeatures[g].location().end(), SequenceRead.end(reads[r])) - Math.max(geneFeatures[g].location().start(), SequenceRead.start(reads[r]))) + 1;
                totalIntronCount += overlap;
                // Now we see if we overlap with any of the exons
                for (int s = 0; s < subLocs.length; s++) {
                    if (subLocs[s].start() <= SequenceRead.end(reads[r]) && subLocs[s].end() >= SequenceRead.start(reads[r])) {
                        int exonOverlap = (Math.min(subLocs[s].end(), SequenceRead.end(reads[r])) - Math.max(subLocs[s].start(), SequenceRead.start(reads[r]))) + 1;
                        if (exonOverlap > 0) {
                            totalExonCount += exonOverlap;
                            totalIntronCount -= exonOverlap;
                        }
                    }
                }
            }
            if (totalIntronCount < 0) {
                progressWarningReceived(new SeqMonkException("Intron count of " + totalIntronCount + " for " + geneFeatures[g].name()));
                continue;
            }
            // reads which this count could comprise, rounding down to a whole number.
            if (correctReadLength) {
                totalIntronCount /= readLengths[d];
                totalExonCount /= readLengths[d];
            }
            float intronValue = totalIntronCount;
            float exonValue = totalExonCount;
            // If we're log transforming then we need to set zero values to 0.9
            if (logTransform && intronValue == 0) {
                intronValue = 0.9f;
            }
            if (logTransform && exonValue == 0) {
                exonValue = 0.9f;
            }
            // been asked to.
            if (applyTranscriptLengthCorrection) {
                intronValue /= (intronLength / 1000f);
                exonValue /= (exonLength / 1000f);
            }
            // We also correct by the total read count, or length
            if (correctReadLength) {
                intronValue /= (data[d].getTotalReadCount() / 1000000f);
                exonValue /= (data[d].getTotalReadCount() / 1000000f);
            } else {
                intronValue /= (data[d].getTotalReadLength() / 1000000f);
                exonValue /= (data[d].getTotalReadLength() / 1000000f);
            }
            // Finally we do the log transform if we've been asked to
            if (logTransform) {
                if (intronValue == 0) {
                    intronValue = 0.0001f;
                }
                if (exonValue == 0) {
                    exonValue = 0.0001f;
                }
                intronValue = (float) Math.log(intronValue) / log2;
                exonValue = (float) Math.log(exonValue) / log2;
            }
            // Now we check what value they actually wanted to record
            switch(optionsPanel.quantitationType()) {
                case (EXONS):
                    {
                        data[d].setValueForProbe(allProbes[g], exonValue);
                        break;
                    }
                case (INTRONS):
                    {
                        data[d].setValueForProbe(allProbes[g], intronValue);
                        break;
                    }
                case (RATIO):
                    {
                        if (logTransform) {
                            data[d].setValueForProbe(allProbes[g], exonValue - intronValue);
                        } else {
                            if (intronValue == 0) {
                                intronValue = 0.0001f;
                            }
                            data[d].setValueForProbe(allProbes[g], exonValue / intronValue);
                        }
                        break;
                    }
                default:
                    throw new IllegalStateException("Unknonwn quantitation type " + optionsPanel.quantitationType());
            }
        // if (geneFeatures[f].name().equals("Cpa6")) {
        // try {
        // System.err.println("Stored value was "+data[d].getValueForProbe(allProbes[currentIndex]));
        // } catch (SeqMonkException e) {
        // e.printStackTrace();
        // }
        // }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Splicing efficiency quantitation");
    if (correctReadLength) {
        quantitationDescription.append(" counting reads");
    } else {
        quantitationDescription.append(" counting bases");
    }
    if (optionsPanel.logTransform()) {
        quantitationDescription.append(" log transformed");
    }
    if (applyTranscriptLengthCorrection) {
        quantitationDescription.append(" correcting for feature length");
    }
    // TODO: Add more description
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) Hashtable(java.util.Hashtable) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 14 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class IntronRegressionPipeline method startPipeline.

protected void startPipeline() {
    // We first need to generate probes over all of the features listed in
    // the feature types.  The probes should cover the whole area of the
    // feature regardless of where it splices.
    Vector<Probe> probes = new Vector<Probe>();
    int minDensity = optionsPanel.minDensity();
    int minLength = optionsPanel.minLength();
    double maxPValue = optionsPanel.maxPValue();
    int binSize = optionsPanel.measurementBinSize();
    QuantitationStrandType readFilter = optionsPanel.readFilter();
    Chromosome[] chrs = collection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        Vector<Probe> probesForThisChromosome = new Vector<Probe>();
        progressUpdated("Making probes", c, chrs.length);
        Feature[] features = getValidFeatures(chrs[c]);
        for (int f = 0; f < features.length; f++) {
            if (cancel) {
                progressCancelled();
                return;
            }
            // There are no introns here
            if (!(features[f].location() instanceof SplitLocation))
                continue;
            Location[] subLocations = ((SplitLocation) features[f].location()).subLocations();
            // TODO: Reverse the subLocations if its a reverse feature
            for (int intron = 1; intron < subLocations.length; intron++) {
                int start = subLocations[intron - 1].end();
                int end = subLocations[intron].start();
                if ((end - start) + 1 < minLength) {
                    // This intron is too short.
                    continue;
                }
                // TODO: We could throw away any probes which didn't have enough reads in any feature
                Probe p = new Probe(chrs[c], start, end, features[f].location().strand(), features[f].name() + "_" + intron);
                probesForThisChromosome.add(p);
            }
        }
        // Now we can deduplicate the probes for this chromosome and add them to the main collection
        Probe[] dupProbes = probesForThisChromosome.toArray(new Probe[0]);
        Arrays.sort(dupProbes);
        for (int p = 0; p < dupProbes.length; p++) {
            if (p > 0 && dupProbes[p].packedPosition() == dupProbes[p - 1].packedPosition())
                continue;
            probes.add(dupProbes[p]);
        }
    }
    Probe[] allProbes = probes.toArray(new Probe[0]);
    collection().setProbeSet(new ProbeSet("Features over " + optionsPanel.getSelectedFeatureType(), allProbes));
    // Now we go back through the probes and quantitate them
    for (int p = 0; p < allProbes.length; p++) {
        if (cancel) {
            progressCancelled();
            return;
        }
        if (p % 1000 == 0) {
            progressUpdated("Quantitated " + p + " out of " + allProbes.length + " probes", p, allProbes.length);
        }
        for (int d = 0; d < data.length; d++) {
            long[] reads = data[d].getReadsForProbe(allProbes[p]);
            int[] countsPerSite = new int[allProbes[p].length()];
            int usableCounts = 0;
            for (int r = 0; r < reads.length; r++) {
                if (readFilter.useRead(allProbes[p], reads[r])) {
                    ++usableCounts;
                    for (int pos = Math.max(0, SequenceRead.start(reads[r]) - allProbes[p].start()); pos <= Math.min(countsPerSite.length - 1, SequenceRead.end(reads[r]) - allProbes[p].start()); pos++) {
                        ++countsPerSite[pos];
                    }
                }
            }
            if (usableCounts / (allProbes[p].length() / 1000d) >= minDensity) {
                // We're going to do a linear regression rather than a correlation
                // We're analysing in bins so we'll work out the bin counts and
                // add them dynamically to the regression.
                SimpleRegression regression = new SimpleRegression();
                int binCount = 0;
                for (int i = 0; i < countsPerSite.length; i++) {
                    if (i > 0 && i % binSize == 0) {
                        regression.addData(i, binCount);
                        binCount = 0;
                    }
                    binCount += countsPerSite[i];
                }
                float slope = (float) (regression.getSlope() * 1000000);
                double pValue = regression.getSignificance();
                if (allProbes[p].strand() == Location.REVERSE) {
                    slope = 0 - slope;
                }
                if (pValue <= maxPValue) {
                    data[d].setValueForProbe(allProbes[p], slope);
                } else {
                    data[d].setValueForProbe(allProbes[p], Float.NaN);
                }
            } else {
                data[d].setValueForProbe(allProbes[p], Float.NaN);
            }
        }
    }
    StringBuffer quantitationDescription = new StringBuffer();
    quantitationDescription.append("Intron regression pipeline quantitation ");
    quantitationDescription.append(". Directionality was ");
    quantitationDescription.append(optionsPanel.libraryTypeBox.getSelectedItem());
    quantitationDescription.append(". Min intron length was ");
    quantitationDescription.append(minLength);
    quantitationDescription.append(". Min read density was ");
    quantitationDescription.append(minDensity);
    quantitationDescription.append(". Max slope p-value was ");
    quantitationDescription.append(maxPValue);
    collection().probeSet().setCurrentQuantitation(quantitationDescription.toString());
    quantitatonComplete();
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) ProbeSet(uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet) SimpleRegression(org.apache.commons.math3.stat.regression.SimpleRegression) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation) QuantitationStrandType(uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType) Vector(java.util.Vector) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location) SplitLocation(uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)

Example 15 with Location

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Location in project SeqMonk by s-andrews.

the class GenericAnnotationParser method parseAnnotation.

/* (non-Javadoc)
	 * @see uk.ac.babraham.SeqMonk.AnnotationParsers.AnnotationParser#parseAnnotation(java.io.File, uk.ac.babraham.SeqMonk.DataTypes.Genome.Genome)
	 */
public AnnotationSet[] parseAnnotation(File file, Genome genome) throws Exception {
    this.file = file;
    if (options == null) {
        options = new JDialog(SeqMonkApplication.getInstance());
        options.setModal(true);
        options.setContentPane(new GenericAnnotationParserOptions(options));
        options.setSize(700, 400);
        options.setLocationRelativeTo(null);
    }
    // We have to set cancel to true as a default so we don't try to
    // proceed with processing if the user closes the options using
    // the X on the window.
    options.setTitle("Format for " + file.getName() + "...");
    cancel = true;
    options.setVisible(true);
    if (cancel) {
        progressCancelled();
        return null;
    }
    // We keep track of how many types have been added
    // to catch if someone sets the wrong field and makes
    // loads of different feature types.
    HashSet<String> addedTypes = new HashSet<String>();
    BufferedReader br;
    if (file.getName().toLowerCase().endsWith(".gz")) {
        br = new BufferedReader(new InputStreamReader(new GZIPInputStream(new FileInputStream(file))));
    } else {
        br = new BufferedReader(new FileReader(file));
    }
    String line;
    // First skip the header lines
    for (int i = 0; i < startRowValue; i++) {
        line = br.readLine();
        if (line == null) {
            throw new Exception("Ran out of file before skipping all of the header lines");
        }
    }
    int maxIndexValue = 0;
    if (chrColValue > maxIndexValue)
        maxIndexValue = chrColValue;
    if (startColValue > maxIndexValue)
        maxIndexValue = startColValue;
    if (endColValue > maxIndexValue)
        maxIndexValue = endColValue;
    if (strandColValue > maxIndexValue)
        maxIndexValue = strandColValue;
    if (typeColValue > maxIndexValue)
        maxIndexValue = typeColValue;
    if (nameColValue > maxIndexValue)
        maxIndexValue = nameColValue;
    if (descriptionColValue > maxIndexValue)
        maxIndexValue = descriptionColValue;
    Vector<AnnotationSet> annotationSets = new Vector<AnnotationSet>();
    AnnotationSet currentAnnotation = new AnnotationSet(genome, file.getName());
    annotationSets.add(currentAnnotation);
    int lineCount = 0;
    // Now process the rest of the file
    while ((line = br.readLine()) != null) {
        ++lineCount;
        if (cancel) {
            progressCancelled();
            return null;
        }
        if (lineCount % 1000 == 0) {
            progressUpdated("Read " + lineCount + " lines from " + file.getName(), 0, 1);
        }
        if (lineCount > 1000000 && lineCount % 1000000 == 0) {
            progressUpdated("Caching...", 0, 1);
            currentAnnotation.finalise();
            currentAnnotation = new AnnotationSet(genome, file.getName() + "[" + annotationSets.size() + "]");
            annotationSets.add(currentAnnotation);
        }
        String[] sections = line.split(delimitersValue, -1);
        // Check to see if we've got enough data to work with
        if (maxIndexValue >= sections.length) {
            progressWarningReceived(new SeqMonkException("Not enough data (" + sections.length + ") to get a probe name on line '" + line + "'"));
            // Skip this line...
            continue;
        }
        int strand;
        int start;
        int end;
        try {
            start = Integer.parseInt(sections[startColValue]);
            end = Integer.parseInt(sections[endColValue]);
            // End must always be later than start
            if (end < start) {
                int temp = start;
                start = end;
                end = temp;
            }
            if (useStrand) {
                if (sections[strandColValue].equals("+") || sections[strandColValue].equals("1") || sections[strandColValue].equals("FF") || sections[strandColValue].equals("F")) {
                    strand = Location.FORWARD;
                } else if (sections[strandColValue].equals("-") || sections[strandColValue].equals("-1") || sections[strandColValue].equals("RF") || sections[strandColValue].equals("R")) {
                    strand = Location.REVERSE;
                } else {
                    progressWarningReceived(new SeqMonkException("Unknown strand character '" + sections[strandColValue] + "' marked as unknown strand"));
                    strand = Location.UNKNOWN;
                }
            } else {
                strand = Location.UNKNOWN;
            }
        } catch (NumberFormatException e) {
            progressWarningReceived(new SeqMonkException("Location " + sections[startColValue] + "-" + sections[endColValue] + " was not an integer"));
            continue;
        }
        ChromosomeWithOffset c;
        try {
            c = genome.getChromosome(sections[chrColValue]);
        } catch (IllegalArgumentException sme) {
            progressWarningReceived(sme);
            continue;
        }
        end = c.position(end);
        start = c.position(start);
        // We also don't allow readings which are beyond the end of the chromosome
        if (end > c.chromosome().length()) {
            int overrun = end - c.chromosome().length();
            progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + c.chromosome().name() + " (" + c.chromosome().length() + ")"));
            continue;
        }
        if (start < 1) {
            progressWarningReceived(new SeqMonkException("Reading start position " + start + " was less than 1"));
            continue;
        }
        // Now we can add the type, name and description
        String type;
        // If there's a column containing the type then use that
        if (typeColValue >= 0) {
            type = sections[typeColValue];
        } else // If not then use the manually specified type if they set it
        if (featureType != null && featureType.length() > 0) {
            type = featureType;
        } else // If all else fails use the file name as the type
        {
            type = file.getName();
        }
        if (!addedTypes.contains(type)) {
            addedTypes.add(type);
            if (addedTypes.size() > 100) {
                throw new SeqMonkException("More than 100 different types of feature added - you don't want to do this!");
            }
        }
        String name = null;
        if (nameColValue >= 0) {
            name = sections[nameColValue];
        }
        String description = null;
        if (descriptionColValue >= 0) {
            description = sections[descriptionColValue];
        }
        // We can now make the new annotation
        Feature feature = new Feature(type, c.chromosome().name());
        if (name != null) {
            feature.addAttribute("name", name);
        }
        if (description != null)
            feature.addAttribute("description", description);
        feature.setLocation(new Location(start, end, strand));
        currentAnnotation.addFeature(feature);
    // System.out.println("Added probe "+newProbe.name()+" on "+newProbe.chromosome()+" at pos "+newProbe.position());
    }
    // We're finished with the file.
    br.close();
    options.dispose();
    options = null;
    return annotationSets.toArray(new AnnotationSet[0]);
}
Also used : ChromosomeWithOffset(uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset) InputStreamReader(java.io.InputStreamReader) AnnotationSet(uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet) Feature(uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature) FileInputStream(java.io.FileInputStream) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) GZIPInputStream(java.util.zip.GZIPInputStream) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Vector(java.util.Vector) JDialog(javax.swing.JDialog) HashSet(java.util.HashSet) Location(uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)

Aggregations

Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)15 Vector (java.util.Vector)14 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)13 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)12 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)9 Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)7 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)5 LongVector (uk.ac.babraham.SeqMonk.Utilities.LongVector)5 Hashtable (java.util.Hashtable)3 AnnotationSet (uk.ac.babraham.SeqMonk.DataTypes.Genome.AnnotationSet)3 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)3 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)3 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 FileReader (java.io.FileReader)2 InputStreamReader (java.io.InputStreamReader)2 GZIPInputStream (java.util.zip.GZIPInputStream)2 ChromosomeWithOffset (uk.ac.babraham.SeqMonk.Utilities.ChromosomeWithOffset)2 Enumeration (java.util.Enumeration)1 HashSet (java.util.HashSet)1