Search in sources :

Example 71 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class GenomeParser method parseChromosome.

/**
 * Parses the chromosome.
 *
 * @param br the br
 * @return the chromosome
 * @throws SeqMonkException the seq monk exception
 * @throws IOException Signals that an I/O exception has occurred.
 */
private Chromosome parseChromosome(BufferedReader br, SingleGenome genome) throws SeqMonkException, IOException {
    String line;
    while ((line = br.readLine()) != null) {
        if (line.startsWith("AC")) {
            String[] sections = line.split(":");
            if (sections.length != 6) {
                // It's not a chromosome file.  We probably just want to
                // skip it and move onto the next entry
                progressWarningReceived(new SeqMonkException("AC line didn't have 6 sections '" + line + "'"));
                skipToEntryEnd(br);
                continue;
            }
            if (line.indexOf("supercontig") >= 0) {
                // It's not a chromosome file.  We probably just want to
                // skip it and move onto the next entry
                skipToEntryEnd(br);
                continue;
            }
            // This will return the existing chromosome of this
            // name if it exists already, but will create a new
            // one if it doesn't.
            Chromosome c = genome.addChromosome(sections[2]);
            c.setLength(Integer.parseInt(sections[4]));
            // Since the positions of all features are given relative
            // to the current sequence we need to add the current
            // start position to all locations as an offset.
            currentOffset = Integer.parseInt(sections[3]) - 1;
            return c;
        }
        if (line.startsWith("//")) {
            throw new SeqMonkException("Couldn't find AC line");
        }
    }
    return null;
}
Also used : Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException)

Example 72 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class GenomeParser method parseChrListFile.

private void parseChrListFile(SingleGenome genome, File baseLocation) throws Exception {
    File chrListFile = new File(baseLocation.getAbsolutePath() + "/chr_list");
    if (chrListFile.exists()) {
        BufferedReader br = new BufferedReader(new FileReader(chrListFile));
        String line;
        while ((line = br.readLine()) != null) {
            String[] sections = line.split("\t");
            Chromosome c = genome.addChromosome(sections[0]);
            c.setLength(Integer.parseInt(sections[1]));
        }
        br.close();
        // If we've loaded the chromosome list we also need to check at this point
        // whether there are any aliases defined since these might be used by the
        // dat or gff files.  We will end up re-adding these aliases a bit later
        // which is unfortunate but won't slow things down much so it's not too bad.
        File aliasesFile = new File(baseLocation.getAbsoluteFile() + "/aliases.txt");
        if (aliasesFile.exists()) {
            try {
                readAliases(aliasesFile, genome);
            } catch (IOException e) {
                throw new IllegalStateException(e);
            }
        }
    }
}
Also used : BufferedReader(java.io.BufferedReader) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) FileReader(java.io.FileReader) IOException(java.io.IOException) File(java.io.File)

Example 73 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class ActiveProbeListParser method processNormalDataStore.

private DataSet processNormalDataStore(ProbeList activeList) {
    int extendBy = prefs.extendReads();
    boolean reverse = prefs.reverseReads();
    boolean modifyStrand = false;
    int forcedStrand = 0;
    if (!prefs.strandOptionBox.getSelectedItem().equals("From probes")) {
        modifyStrand = true;
        if (prefs.strandOptionBox.getSelectedItem().equals("Forward")) {
            forcedStrand = Location.FORWARD;
        } else if (prefs.strandOptionBox.getSelectedItem().equals("Reverse")) {
            forcedStrand = Location.REVERSE;
        } else if (prefs.strandOptionBox.getSelectedItem().equals("Unknown")) {
            forcedStrand = Location.UNKNOWN;
        } else {
            throw new IllegalArgumentException("Unknown forced strand option " + prefs.strandOptionBox.getSelectedItem());
        }
    }
    DataSet newData = new DataSet(activeList.name(), "Reimported from " + activeList.name(), prefs.removeDuplicates());
    // Now process the data
    Chromosome[] chrs = dataCollection().genome().getAllChromosomes();
    for (int c = 0; c < chrs.length; c++) {
        progressUpdated("Processing " + activeList.name() + " chr " + chrs[c].name(), c, chrs.length);
        Probe[] probes = activeList.getProbesForChromosome(chrs[c]);
        for (int r = 0; r < probes.length; r++) {
            if (cancel) {
                progressCancelled();
                return null;
            }
            long read;
            int start = probes[r].start();
            int end = probes[r].end();
            int strand = probes[r].strand();
            if (reverse) {
                if (strand == Location.FORWARD) {
                    strand = Location.REVERSE;
                } else if (strand == Location.REVERSE) {
                    strand = Location.FORWARD;
                }
            }
            if (extendBy != 0) {
                // We now allow negative extensions to shorten reads
                if (strand == Location.FORWARD || strand == Location.UNKNOWN) {
                    end += extendBy;
                    if (end < start)
                        end = start;
                } else if (strand == Location.REVERSE) {
                    start -= extendBy;
                    if (start > end)
                        start = end;
                }
            }
            // We don't allow reads before the start of the chromosome
            if (start < 1) {
                int overrun = (0 - start) + 1;
                progressWarningReceived(new SeqMonkException("Reading position " + start + " was " + overrun + "bp before the start of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                continue;
            }
            // We also don't allow readings which are beyond the end of the chromosome
            if (end > chrs[c].length()) {
                int overrun = end - chrs[c].length();
                progressWarningReceived(new SeqMonkException("Reading position " + end + " was " + overrun + "bp beyond the end of chr" + chrs[c].name() + " (" + chrs[c].length() + ")"));
                continue;
            }
            // Force the strand to what they specified if they want this.
            if (modifyStrand) {
                strand = forcedStrand;
            }
            // We can now make the new reading
            try {
                read = SequenceRead.packPosition(start, end, strand);
                newData.addData(chrs[c], read);
            } catch (SeqMonkException e) {
                progressWarningReceived(e);
                continue;
            }
        }
    }
    return newData;
}
Also used : DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) SeqMonkException(uk.ac.babraham.SeqMonk.SeqMonkException) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 74 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class HiCOtherEndExtractor method run.

/* (non-Javadoc)
	 * @see java.lang.Runnable#run()
	 */
public void run() {
    // We need to get the baits and other options from the preferences
    Probe[] baits = prefs.getBaits();
    boolean mergeIntoOne = prefs.mergeIntoOne();
    boolean excludeSelf = prefs.excludeSelf();
    DataSet[] newData;
    if (mergeIntoOne) {
        newData = new DataSet[visibleHiCDataSets.length];
    } else {
        newData = new DataSet[visibleHiCDataSets.length * baits.length];
    }
    try {
        for (int d = 0; d < visibleHiCDataSets.length; d++) {
            if (baits.length == 1) {
                System.err.println("Only one bait");
                newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[0].chromosome().name() + ":" + baits[0].start() + "-" + baits[0].end(), DataSet.DUPLICATES_REMOVE_NO);
            } else if (mergeIntoOne) {
                System.err.println("Multiple baits, but merging");
                newData[d] = new DataSet(prefs.prefix.getText() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for " + baits.length + " regions", DataSet.DUPLICATES_REMOVE_NO);
            }
            for (int b = 0; b < baits.length; b++) {
                if (cancel) {
                    progressCancelled();
                    return;
                }
                progressUpdated("Getting other ends from " + visibleHiCDataSets[d].name() + " and " + baits[b].toString(), (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
                if (!(baits.length == 1 || mergeIntoOne)) {
                    newData[(d * baits.length) + b] = new DataSet(baits[b].toString() + "_" + visibleHiCDataSets[d].name(), "HiC other end of " + visibleHiCDataSets[d].name() + " for region " + baits[b].chromosome().name() + ":" + baits[b].start() + "-" + baits[b].end(), DataSet.DUPLICATES_REMOVE_NO);
                }
                HiCHitCollection hiCCollection = visibleHiCDataSets[d].getHiCReadsForProbe(baits[b]);
                String[] chromosomes = hiCCollection.getChromosomeNamesWithHits();
                for (int c = 0; c < chromosomes.length; c++) {
                    Chromosome chromosome = collection.genome().getChromosome(chromosomes[c]).chromosome();
                    long[] reads = hiCCollection.getHitPositionsForChromosome(chromosomes[c]);
                    for (int r = 0; r < reads.length; r++) {
                        if (cancel) {
                            progressCancelled();
                            return;
                        }
                        if (excludeSelf && baits[b].chromosome().name().equals(chromosomes[c]) && SequenceRead.overlaps(reads[r], baits[b].packedPosition())) {
                            continue;
                        }
                        if (mergeIntoOne) {
                            newData[d].addData(chromosome, reads[r]);
                        } else {
                            newData[(d * baits.length) + b].addData(chromosome, reads[r]);
                        }
                    }
                }
                if (!mergeIntoOne) {
                    // Cache the data in the new dataset
                    progressUpdated("Caching data", (d * baits.length) + b, visibleHiCDataSets.length * baits.length);
                    newData[(d * baits.length) + b].finalise();
                }
            }
            if (mergeIntoOne) {
                // Cache the data in the new dataset
                progressUpdated("Caching data", d, visibleHiCDataSets.length);
                newData[d].finalise();
            }
        }
        processingFinished(newData);
    } catch (Exception ex) {
        progressExceptionReceived(ex);
    }
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) DataSet(uk.ac.babraham.SeqMonk.DataTypes.DataSet) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome) Probe(uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)

Example 75 with Chromosome

use of uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome in project SeqMonk by s-andrews.

the class PairedDataSet method getExportableReadsForChromosome.

public synchronized HiCHitCollection getExportableReadsForChromosome(Chromosome c) {
    if (!isFinalised)
        finalise();
    HiCHitCollection exportableReads = new HiCHitCollection(c.name());
    HiCHitCollection redundantReads = getHiCReadsForChromosome(c);
    String[] chrs = redundantReads.getChromosomeNamesWithHits();
    for (int i = 0; i < chrs.length; i++) {
        // The way we have to do this lookup is ugly, but that's how it is
        // We only include data for chromosomes the same or later than our own
        Chromosome thisChr = SeqMonkApplication.getInstance().dataCollection().genome().getChromosome(chrs[i]).chromosome();
        if (c.compareTo(thisChr) < 0) {
            System.err.println("Ignoring " + thisChr + " when exporting " + c);
            continue;
        }
        long[] source = redundantReads.getSourcePositionsForChromosome(chrs[i]);
        long[] hits = redundantReads.getHitPositionsForChromosome(chrs[i]);
        if (c != thisChr) {
            // Put everything from this chromosome into the results
            for (int j = 0; j < source.length; j++) {
                exportableReads.addHit(chrs[i], source[j], hits[j]);
            }
        } else {
            // Put only the entries where the hit is later than the source in
            for (int j = 0; j < source.length; j++) {
                if ((source[j] == hits[j] && j % 2 == 0) || SequenceRead.compare(source[j], hits[j]) >= 0) {
                    exportableReads.addHit(chrs[i], source[j], hits[j]);
                }
            }
        }
    }
    return exportableReads;
}
Also used : HiCHitCollection(uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection) Chromosome(uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)

Aggregations

Chromosome (uk.ac.babraham.SeqMonk.DataTypes.Genome.Chromosome)78 Probe (uk.ac.babraham.SeqMonk.DataTypes.Probes.Probe)47 Vector (java.util.Vector)36 Feature (uk.ac.babraham.SeqMonk.DataTypes.Genome.Feature)23 SeqMonkException (uk.ac.babraham.SeqMonk.SeqMonkException)23 ProbeSet (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeSet)22 ProbeList (uk.ac.babraham.SeqMonk.DataTypes.Probes.ProbeList)12 DataStore (uk.ac.babraham.SeqMonk.DataTypes.DataStore)11 DataSet (uk.ac.babraham.SeqMonk.DataTypes.DataSet)8 ReadsWithCounts (uk.ac.babraham.SeqMonk.DataTypes.Sequence.ReadsWithCounts)8 Location (uk.ac.babraham.SeqMonk.DataTypes.Genome.Location)7 SplitLocation (uk.ac.babraham.SeqMonk.DataTypes.Genome.SplitLocation)7 ProgressListener (uk.ac.babraham.SeqMonk.DataTypes.ProgressListener)7 HiCHitCollection (uk.ac.babraham.SeqMonk.DataTypes.Sequence.HiCHitCollection)7 IOException (java.io.IOException)6 File (java.io.File)5 Hashtable (java.util.Hashtable)5 HiCDataStore (uk.ac.babraham.SeqMonk.DataTypes.HiCDataStore)5 QuantitationStrandType (uk.ac.babraham.SeqMonk.DataTypes.Sequence.QuantitationStrandType)5 PairedDataSet (uk.ac.babraham.SeqMonk.DataTypes.PairedDataSet)4