use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UcscGenePred method getExons.
private List<IntervalFeature> getExons(List<String> genePredList, String source) throws InvalidGenomicCoordsException {
int exonCount = Integer.parseInt(genePredList.get(7));
Iterator<String> iter = Splitter.on(",").omitEmptyStrings().split(genePredList.get(8)).iterator();
int[] exonStarts = new int[exonCount];
int i = 0;
while (iter.hasNext()) {
exonStarts[i] = Integer.parseInt(iter.next());
i++;
}
iter = Splitter.on(",").omitEmptyStrings().split(genePredList.get(9)).iterator();
int[] exonEnds = new int[exonCount];
i = 0;
while (iter.hasNext()) {
exonEnds[i] = Integer.parseInt(iter.next());
i++;
}
List<IntervalFeature> exons = new ArrayList<IntervalFeature>();
for (int j = 0; j < exonStarts.length; j++) {
String[] gff = new String[9];
gff[0] = genePredList.get(1);
gff[1] = source;
gff[2] = "exon";
gff[3] = Integer.toString(exonStarts[j] + 1);
gff[4] = Integer.toString(exonEnds[j]);
gff[5] = ".";
// Strand
gff[6] = genePredList.get(2);
// Frame: Exon do not have frame set. CDS do.
gff[7] = ".";
gff[8] = "gene_id \"" + genePredList.get(11) + '"' + "; transcript_id \"" + genePredList.get(0) + '"' + "; exon_number \"" + (j + 1) + '"' + "; exon_id \"" + genePredList.get(0) + "." + (j + 1) + '"' + "; gene_name \"" + genePredList.get(11) + "\";";
IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
exons.add(x);
}
return exons;
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UcscGenePred method getStartCodonRev.
private List<IntervalFeature> getStartCodonRev(List<IntervalFeature> cds, String cdsEndStat) throws InvalidGenomicCoordsException {
List<IntervalFeature> codons = new ArrayList<IntervalFeature>();
if (cds.size() == 0 || cds.get(0).getStrand() == '+' || !cdsEndStat.equals("cmpl")) {
return codons;
}
// cccccccc cc
// a aa
// Get the last three bases of the last CDS. If CDS length is < 3, get the remainder from
// previous CDS
IntervalFeature c = cds.get(cds.size() - 1);
int cdnStart = c.getTo() - 2;
int remainder = c.getFrom() - cdnStart;
if (remainder <= 0) {
// Codon is fully contained in CDS. Easy.
String[] gff = new String[9];
gff[0] = c.getChrom();
gff[1] = c.getSource();
gff[2] = "start_codon";
gff[3] = Integer.toString(cdnStart);
gff[4] = Integer.toString(c.getTo());
gff[5] = ".";
gff[6] = String.valueOf(c.getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
codons.add(x);
return codons;
}
// Add to list this partial codon and get "remainder" from next cds
String[] gff = new String[9];
gff[0] = c.getChrom();
gff[1] = c.getSource();
gff[2] = "start_codon";
gff[3] = Integer.toString(c.getFrom());
gff[4] = Integer.toString(c.getTo());
gff[5] = ".";
gff[6] = String.valueOf(c.getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
IntervalFeature x1 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
codons.add(x1);
c = cds.get(cds.size() - 2);
cdnStart = c.getTo() - remainder + 1;
gff = new String[9];
gff[0] = c.getChrom();
gff[1] = c.getSource();
gff[2] = "start_codon";
gff[3] = Integer.toString(cdnStart);
gff[4] = Integer.toString(c.getTo());
gff[5] = ".";
gff[6] = String.valueOf(c.getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
IntervalFeature x2 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
codons.add(x2);
return codons;
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UcscGenePred method getRightUTR.
private List<IntervalFeature> getRightUTR(List<IntervalFeature> exons, final int cdsStart, final int cdsEnd) throws InvalidGenomicCoordsException {
List<IntervalFeature> utr = new ArrayList<IntervalFeature>();
if (cdsStart > cdsEnd) {
// There is no UTR in this tranx
return utr;
}
for (IntervalFeature exon : exons) {
if (exon.getTo() <= cdsEnd) {
// Exon is fully the left of cdsEnd, ie. is CDS
continue;
}
// Use this if the exon is completely to the right of cdsEnd
int utrExonStart = exon.getFrom();
if (exon.getFrom() < cdsEnd && exon.getTo() > cdsEnd) {
// Is the exon containing the cdsStart?
utrExonStart = cdsEnd - 1;
}
String[] gff = new String[9];
gff[0] = exon.getChrom();
gff[1] = exon.getSource();
gff[2] = exon.getStrand() == '+' ? "3UTR" : "5UTR";
gff[3] = Integer.toString(utrExonStart);
gff[4] = Integer.toString(exon.getTo());
gff[5] = ".";
gff[6] = String.valueOf(exons.get(0).getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(exon.getRaw())).get(8);
;
IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
utr.add(x);
}
return utr;
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UcscGenePred method getStartCodonFwd.
private List<IntervalFeature> getStartCodonFwd(List<IntervalFeature> cds, String cdsStartStat) throws InvalidGenomicCoordsException {
List<IntervalFeature> codons = new ArrayList<IntervalFeature>();
if (cds.size() == 0 || cds.get(0).getStrand() == '-' || !cdsStartStat.equals("cmpl")) {
return codons;
}
// CC CCCCC
// AA A
// For tx on +: Get the first three bases of the first CDS,. If CDS length is < 3, get the remainder from
// next CDS
IntervalFeature c = cds.get(0);
int cdnEnd = c.getFrom() + 2;
int remainder = cdnEnd - c.getTo();
if (remainder <= 0) {
// Codon is fully contained in CDS. Easy.
String[] gff = new String[9];
gff[0] = c.getChrom();
gff[1] = c.getSource();
gff[2] = "start_codon";
gff[3] = Integer.toString(c.getFrom());
gff[4] = Integer.toString(cdnEnd);
gff[5] = ".";
gff[6] = String.valueOf(c.getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
IntervalFeature x = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
codons.add(x);
return codons;
}
// Add to list this partial codon and get "remainder" from next cds
String[] gff = new String[9];
gff[0] = c.getChrom();
gff[1] = c.getSource();
gff[2] = "start_codon";
gff[3] = Integer.toString(c.getFrom());
gff[4] = Integer.toString(cdnEnd - remainder);
gff[5] = ".";
gff[6] = String.valueOf(c.getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
IntervalFeature x1 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
codons.add(x1);
c = cds.get(1);
cdnEnd = c.getFrom() + remainder - 1;
gff = new String[9];
gff[0] = c.getChrom();
gff[1] = c.getSource();
gff[2] = "start_codon";
gff[3] = Integer.toString(c.getFrom());
gff[4] = Integer.toString(cdnEnd);
gff[5] = ".";
gff[6] = String.valueOf(c.getStrand());
gff[7] = ".";
gff[8] = Lists.newArrayList(Splitter.on("\t").omitEmptyStrings().split(c.getRaw())).get(8);
IntervalFeature x2 = new IntervalFeature(Joiner.on("\t").join(gff), TrackFormat.GTF, null);
codons.add(x2);
return codons;
}
use of tracks.IntervalFeature in project ASCIIGenome by dariober.
the class UtilsTest method canMergeIntervals.
@Test
public void canMergeIntervals() throws InvalidGenomicCoordsException, InvalidColourException {
// Zero len list
List<IntervalFeature> intv = new ArrayList<IntervalFeature>();
assertEquals(0, Utils.mergeIntervalFeatures(intv, false).size());
// Fully contained feature
intv.clear();
intv.add(new IntervalFeature("chr1 . . 100 1000 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
intv.add(new IntervalFeature("chr1 . . 200 300 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
assertEquals(100, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
assertEquals(1000, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
// Partial overlap contained feature
intv.clear();
intv.add(new IntervalFeature("chr1 . . 100 1000 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
intv.add(new IntervalFeature("chr1 . . 200 300 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
intv.add(new IntervalFeature("chr1 . . 500 5000 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
assertEquals(100, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
assertEquals(5000, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
/* MEMO: Start of bed features must be augmented by 1 */
// One feature
intv.clear();
intv.add(new IntervalFeature("chr1 0 10 x1".replaceAll(" ", "\t"), TrackFormat.BED, null));
assertEquals(1, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
// Test the name is taken from the original feature since only one interval is merged (i.e. no merging at all)
assertEquals(intv.get(0).getName(), Utils.mergeIntervalFeatures(intv, false).get(0).getName());
// One feature overalapping
intv.add(new IntervalFeature("chr1 5 10".replaceAll(" ", "\t"), TrackFormat.BED, null));
IntervalFeature expected = new IntervalFeature("chr1 0 10".replaceAll(" ", "\t"), TrackFormat.BED, null);
assertEquals(expected.getFrom(), Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
assertTrue(expected.equals(Utils.mergeIntervalFeatures(intv, false).get(0)));
intv.add(new IntervalFeature("chr1 20 100".replaceAll(" ", "\t"), TrackFormat.BED, null));
assertEquals(2, Utils.mergeIntervalFeatures(intv, false).size());
assertEquals(21, Utils.mergeIntervalFeatures(intv, false).get(1).getFrom());
assertEquals(100, Utils.mergeIntervalFeatures(intv, false).get(1).getTo());
intv.add(new IntervalFeature("chr1 30 110".replaceAll(" ", "\t"), TrackFormat.BED, null));
intv.add(new IntervalFeature("chr1 50 110".replaceAll(" ", "\t"), TrackFormat.BED, null));
assertEquals(2, Utils.mergeIntervalFeatures(intv, false).size());
assertEquals(21, Utils.mergeIntervalFeatures(intv, false).get(1).getFrom());
assertEquals(110, Utils.mergeIntervalFeatures(intv, false).get(1).getTo());
// Touching features get merged into a single one
intv.clear();
intv.add(new IntervalFeature("chr1 0 10".replaceAll(" ", "\t"), TrackFormat.BED, null));
intv.add(new IntervalFeature("chr1 10 20".replaceAll(" ", "\t"), TrackFormat.BED, null));
assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
assertEquals(1, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
assertEquals(20, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
// Touching GFF feature
intv.clear();
intv.add(new IntervalFeature("chr1 . . 1 10 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
intv.add(new IntervalFeature("chr1 . . 11 20 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
assertEquals(1, Utils.mergeIntervalFeatures(intv, false).size());
assertEquals(1, Utils.mergeIntervalFeatures(intv, false).get(0).getFrom());
assertEquals(20, Utils.mergeIntervalFeatures(intv, false).get(0).getTo());
// Nothing to merge
intv.clear();
intv.add(new IntervalFeature("chr1 . . 1 10 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
intv.add(new IntervalFeature("chr1 . . 20 30 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
intv.add(new IntervalFeature("chr1 . . 40 50 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
assertEquals(3, Utils.mergeIntervalFeatures(intv, false).size());
intv.add(new IntervalFeature("chr1 . . 40 50 . . .".replaceAll(" ", "\t"), TrackFormat.GTF, null));
assertEquals(3, Utils.mergeIntervalFeatures(intv, false).size());
}
Aggregations