Search in sources :

Example 6 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UcscGenePred method getExons.

private List<IntervalFeature> getExons(List<String> genePredList, String source) throws InvalidGenomicCoordsException {
    int exonCount = Integer.parseInt(genePredList.get(7));
    Iterator<String> iter = Splitter.on(",").omitEmptyStrings().split(genePredList.get(8)).iterator();
    int[] exonStarts = new int[exonCount];
    int i = 0;
    while (iter.hasNext()) {
        exonStarts[i] = Integer.parseInt(iter.next());
        i++;
    }
    iter = Splitter.on(",").omitEmptyStrings().split(genePredList.get(9)).iterator();
    int[] exonEnds = new int[exonCount];
    i = 0;
    while (iter.hasNext()) {
        exonEnds[i] = Integer.parseInt(iter.next());
        i++;
    }
    List<IntervalFeature> exons = new ArrayList<IntervalFeature>();
    for (int j = 0; j < exonStarts.length; j++) {
        String[] gff = new String[9];
        gff[0] = genePredList.get(1);
        gff[1] = source;
        gff[2] = "exon";
        gff[3] = Integer.toString(exonStarts[j] + 1);
        gff[4] = Integer.toString(exonEnds[j]);
        gff[5] = ".";
        // Strand
        gff[6] = genePredList.get(2);
        // Frame: Exon do not have frame set. CDS do.
        gff[7] = ".";
        gff[8] = "gene_id \"" + genePredList.get(11) + '"' + "; transcript_id \"" + genePredList.get(0) + '"' + "; exon_number \"" + (j + 1) + '"' + "; exon_id \"" + genePredList.get(0) + "." + (j + 1) + '"' + "; gene_name \"" + genePredList.get(11) + "\";";
        IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
        exons.add(x);
    }
    return exons;
}
Also used : ArrayList(java.util.ArrayList) IntervalFeature(tracks.IntervalFeature)

Example 7 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UcscGenePred method getStartCodonRev.

private List<IntervalFeature> getStartCodonRev(List<IntervalFeature> cds, String cdsEndStat) throws InvalidGenomicCoordsException {
    List<IntervalFeature> codons = new ArrayList<IntervalFeature>();
    if (cds.size() == 0 || cds.get(0).getStrand() == '+' || !cdsEndStat.equals("cmpl")) {
        return codons;
    }
    // cccccccc      cc
    // a      aa
    // Get the last three bases of the last CDS. If CDS length is < 3, get the remainder from
    // previous CDS
    IntervalFeature c = cds.get(cds.size() - 1);
    int cdnStart = c.getTo() - 2;
    int remainder = c.getFrom() - cdnStart;
    if (remainder <= 0) {
        // Codon is fully contained in CDS. Easy.
        String[] gff = new String[9];
        gff[0] = c.getChrom();
        gff[1] = c.getSource();
        gff[2] = "start_codon";
        gff[3] = Integer.toString(cdnStart);
        gff[4] = Integer.toString(c.getTo());
        gff[5] = ".";
        gff[6] = String.valueOf(c.getStrand());
        gff[7] = ".";
        gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
        IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
        codons.add(x);
        return codons;
    }
    // Add to list this partial codon and get "remainder" from next cds
    String[] gff = new String[9];
    gff[0] = c.getChrom();
    gff[1] = c.getSource();
    gff[2] = "start_codon";
    gff[3] = Integer.toString(c.getFrom());
    gff[4] = Integer.toString(c.getTo());
    gff[5] = ".";
    gff[6] = String.valueOf(c.getStrand());
    gff[7] = ".";
    gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
    IntervalFeature x1 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
    codons.add(x1);
    c = cds.get(cds.size() - 2);
    cdnStart = c.getTo() - remainder + 1;
    gff = new String[9];
    gff[0] = c.getChrom();
    gff[1] = c.getSource();
    gff[2] = "start_codon";
    gff[3] = Integer.toString(cdnStart);
    gff[4] = Integer.toString(c.getTo());
    gff[5] = ".";
    gff[6] = String.valueOf(c.getStrand());
    gff[7] = ".";
    gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
    IntervalFeature x2 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
    codons.add(x2);
    return codons;
}
Also used : ArrayList(java.util.ArrayList) IntervalFeature(tracks.IntervalFeature)

Example 8 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UcscGenePred method getRightUTR.

private List<IntervalFeature> getRightUTR(List<IntervalFeature> exons, final int cdsStart, final int cdsEnd) throws InvalidGenomicCoordsException {
    List<IntervalFeature> utr = new ArrayList<IntervalFeature>();
    if (cdsStart > cdsEnd) {
        // There is no UTR in this tranx
        return utr;
    }
    for (IntervalFeature exon : exons) {
        if (exon.getTo() <= cdsEnd) {
            // Exon is fully the left of cdsEnd, ie. is CDS
            continue;
        }
        // Use this if the exon is completely to the right of cdsEnd
        int utrExonStart = exon.getFrom();
        if (exon.getFrom() < cdsEnd && exon.getTo() > cdsEnd) {
            // Is the exon containing the cdsStart?
            utrExonStart = cdsEnd - 1;
        }
        String[] gff = new String[9];
        gff[0] = exon.getChrom();
        gff[1] = exon.getSource();
        gff[2] = exon.getStrand() == '+' ? "3UTR" : "5UTR";
        gff[3] = Integer.toString(utrExonStart);
        gff[4] = Integer.toString(exon.getTo());
        gff[5] = ".";
        gff[6] = String.valueOf(exons.get(0).getStrand());
        gff[7] = ".";
        gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(exon.getRaw())).get(8);
        ;
        IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
        utr.add(x);
    }
    return utr;
}
Also used : ArrayList(java.util.ArrayList) IntervalFeature(tracks.IntervalFeature)

Example 9 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UcscGenePred method getStartCodonFwd.

private List<IntervalFeature> getStartCodonFwd(List<IntervalFeature> cds, String cdsStartStat) throws InvalidGenomicCoordsException {
    List<IntervalFeature> codons = new ArrayList<IntervalFeature>();
    if (cds.size() == 0 || cds.get(0).getStrand() == '-' || !cdsStartStat.equals("cmpl")) {
        return codons;
    }
    // CC   CCCCC
    // AA   A
    // For tx on +: Get the first three bases of the first CDS,. If CDS length is < 3, get the remainder from
    // next CDS
    IntervalFeature c = cds.get(0);
    int cdnEnd = c.getFrom() + 2;
    int remainder = cdnEnd - c.getTo();
    if (remainder <= 0) {
        // Codon is fully contained in CDS. Easy.
        String[] gff = new String[9];
        gff[0] = c.getChrom();
        gff[1] = c.getSource();
        gff[2] = "start_codon";
        gff[3] = Integer.toString(c.getFrom());
        gff[4] = Integer.toString(cdnEnd);
        gff[5] = ".";
        gff[6] = String.valueOf(c.getStrand());
        gff[7] = ".";
        gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
        IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
        codons.add(x);
        return codons;
    }
    // Add to list this partial codon and get "remainder" from next cds
    String[] gff = new String[9];
    gff[0] = c.getChrom();
    gff[1] = c.getSource();
    gff[2] = "start_codon";
    gff[3] = Integer.toString(c.getFrom());
    gff[4] = Integer.toString(cdnEnd - remainder);
    gff[5] = ".";
    gff[6] = String.valueOf(c.getStrand());
    gff[7] = ".";
    gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
    IntervalFeature x1 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
    codons.add(x1);
    c = cds.get(1);
    cdnEnd = c.getFrom() + remainder - 1;
    gff = new String[9];
    gff[0] = c.getChrom();
    gff[1] = c.getSource();
    gff[2] = "start_codon";
    gff[3] = Integer.toString(c.getFrom());
    gff[4] = Integer.toString(cdnEnd);
    gff[5] = ".";
    gff[6] = String.valueOf(c.getStrand());
    gff[7] = ".";
    gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
    IntervalFeature x2 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
    codons.add(x2);
    return codons;
}
Also used : ArrayList(java.util.ArrayList) IntervalFeature(tracks.IntervalFeature)

Example 10 with IntervalFeature

use of tracks.IntervalFeature in project ASCIIGenome by dariober.

the class UtilsTest method canMergeIntervals.

@Test
public void canMergeIntervals() throws InvalidGenomicCoordsException, InvalidColourException {
    // Zero len list
    List<IntervalFeature> intv = new ArrayList<IntervalFeature>();
    assertEquals(0, Utils.mergeIntervalFeatures(intv, false).size());
    // Fully contained feature
    intv.clear();
    intv.add(new IntervalFeature("chr1 . . 100 1000 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    intv.add(new IntervalFeature("chr1 . . 200 300 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
    assertEquals(100, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
    assertEquals(1000, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
    // Partial overlap contained feature
    intv.clear();
    intv.add(new IntervalFeature("chr1 . . 100 1000 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    intv.add(new IntervalFeature("chr1 . . 200 300 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    intv.add(new IntervalFeature("chr1 . . 500 5000 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
    assertEquals(100, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
    assertEquals(5000, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
    /* MEMO: Start of bed features must be augmented by 1 */
    // One feature
    intv.clear();
    intv.add(new IntervalFeature("chr1 0 10 x1".replaceAll(" ", "\t"), TrackFormat.BED, null));
    assertEquals(1, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
    // Test the name is taken from the original feature since only one interval is merged (i.e. no merging at all)
    assertEquals(intv.get(0).getName(), Utils.mergeIntervalFeatures(intv, false).get(0).getName());
    // One feature overalapping
    intv.add(new IntervalFeature("chr1 5 10".replaceAll(" ", "\t"), TrackFormat.BED, null));
    IntervalFeature expected = new IntervalFeature("chr1 0 10".replaceAll(" ", "\t"), TrackFormat.BED, null);
    assertEquals(expected.getFrom(), Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
    assertTrue(expected.equals(Utils.mergeIntervalFeatures(intv, false).get(0)));
    intv.add(new IntervalFeature("chr1 20 100".replaceAll(" ", "\t"), TrackFormat.BED, null));
    assertEquals(2, Utils.mergeIntervalFeatures(intv, false).size());
    assertEquals(21, Utils.mergeIntervalFeatures(intv, false).get(1).getFrom());
    assertEquals(100, Utils.mergeIntervalFeatures(intv, false).get(1).getTo());
    intv.add(new IntervalFeature("chr1 30 110".replaceAll(" ", "\t"), TrackFormat.BED, null));
    intv.add(new IntervalFeature("chr1 50 110".replaceAll(" ", "\t"), TrackFormat.BED, null));
    assertEquals(2, Utils.mergeIntervalFeatures(intv, false).size());
    assertEquals(21, Utils.mergeIntervalFeatures(intv, false).get(1).getFrom());
    assertEquals(110, Utils.mergeIntervalFeatures(intv, false).get(1).getTo());
    // Touching features get merged into a single one
    intv.clear();
    intv.add(new IntervalFeature("chr1 0 10".replaceAll(" ", "\t"), TrackFormat.BED, null));
    intv.add(new IntervalFeature("chr1 10 20".replaceAll(" ", "\t"), TrackFormat.BED, null));
    assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
    assertEquals(1, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
    assertEquals(20, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
    // Touching GFF feature
    intv.clear();
    intv.add(new IntervalFeature("chr1 . . 1 10 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    intv.add(new IntervalFeature("chr1 . . 11 20 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
    assertEquals(1, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
    assertEquals(20, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
    // Nothing to merge
    intv.clear();
    intv.add(new IntervalFeature("chr1 . . 1 10 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    intv.add(new IntervalFeature("chr1 . . 20 30 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    intv.add(new IntervalFeature("chr1 . . 40 50 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    assertEquals(3, Utils.mergeIntervalFeatures(intv, false).size());
    intv.add(new IntervalFeature("chr1 . . 40 50 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
    assertEquals(3, Utils.mergeIntervalFeatures(intv, false).size());
}
Also used : ArrayList(java.util.ArrayList) IntervalFeature(tracks.IntervalFeature) Test(org.junit.Test)

Aggregations

IntervalFeature (tracks.IntervalFeature)11 ArrayList (java.util.ArrayList)9 InvalidGenomicCoordsException (exceptions.InvalidGenomicCoordsException)2 BufferedReader (java.io.BufferedReader)2 Config (coloring.Config)1 Xterm256 (coloring.Xterm256)1 SAMRecord (htsjdk.samtools.SAMRecord)1 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)1 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)1 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)1 SamReader (htsjdk.samtools.SamReader)1 SamReaderFactory (htsjdk.samtools.SamReaderFactory)1 IndexedFastaSequenceFile (htsjdk.samtools.reference.IndexedFastaSequenceFile)1 AbstractFeatureReader (htsjdk.tribble.AbstractFeatureReader)1 TabixReader (htsjdk.tribble.readers.TabixReader)1 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)1 ByteArrayInputStream (java.io.ByteArrayInputStream)1 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 FileReader (java.io.FileReader)1