use of htsjdk.samtools.util.Interval in project gatk by broadinstitute.
the class BedToIntervalList method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
IOUtil.assertFileIsWritable(OUTPUT);
try {
final SAMFileHeader header = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).getFileHeader(SEQUENCE_DICTIONARY);
final IntervalList intervalList = new IntervalList(header);
/**
* NB: BED is zero-based, but a BEDCodec by default (since it is returns tribble Features) has an offset of one,
* so it returns 1-based starts. Ugh. Set to zero.
*/
final FeatureReader<BEDFeature> bedReader = AbstractFeatureReader.getFeatureReader(INPUT.getAbsolutePath(), new BEDCodec(BEDCodec.StartOffset.ZERO), false);
final CloseableTribbleIterator<BEDFeature> iterator = bedReader.iterator();
final ProgressLogger progressLogger = new ProgressLogger(logger, (int) 1e6);
while (iterator.hasNext()) {
final BEDFeature bedFeature = iterator.next();
final String sequenceName = bedFeature.getContig();
/**
* NB: BED is zero-based, so we need to add one here to make it one-based. Please observe we set the start
* offset to zero when creating the BEDCodec.
*/
final int start = bedFeature.getStart() + 1;
/**
* NB: BED is 0-based OPEN (which, for the end is equivalent to 1-based closed).
*/
final int end = bedFeature.getEnd();
// NB: do not use an empty name within an interval
String name = bedFeature.getName();
if (name.isEmpty())
name = null;
final SAMSequenceRecord sequenceRecord = header.getSequenceDictionary().getSequence(sequenceName);
// Do some validation
if (null == sequenceRecord) {
throw new GATKException(String.format("Sequence '%s' was not found in the sequence dictionary", sequenceName));
} else if (start < 1) {
throw new GATKException(String.format("Start on sequence '%s' was less than one: %d", sequenceName, start));
} else if (sequenceRecord.getSequenceLength() < start) {
throw new GATKException(String.format("Start on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), start));
} else if (end < 1) {
throw new GATKException(String.format("End on sequence '%s' was less than one: %d", sequenceName, end));
} else if (sequenceRecord.getSequenceLength() < end) {
throw new GATKException(String.format("End on sequence '%s' was past the end: %d < %d", sequenceName, sequenceRecord.getSequenceLength(), end));
} else if (end < start - 1) {
throw new GATKException(String.format("On sequence '%s', end < start-1: %d <= %d", sequenceName, end, start));
}
final Interval interval = new Interval(sequenceName, start, end, bedFeature.getStrand() == Strand.POSITIVE, name);
intervalList.add(interval);
progressLogger.record(sequenceName, start);
}
CloserUtil.close(bedReader);
// Sort and write the output
intervalList.uniqued().write(OUTPUT);
} catch (final IOException e) {
throw new RuntimeException(e);
}
return null;
}
use of htsjdk.samtools.util.Interval in project gatk by broadinstitute.
the class IntervalUtilsUnitTest method testIntervalFileToListNegativeOneLength.
// TODO - remove once a corrected version of the exome interval list is released.
@Test(dataProvider = "negativeOneLengthIntervalTestData")
public void testIntervalFileToListNegativeOneLength(GenomeLocParser genomeLocParser, String contig, int intervalStart, int intervalEnd) throws Exception {
final SAMFileHeader picardFileHeader = new SAMFileHeader();
picardFileHeader.addSequence(genomeLocParser.getContigInfo("1"));
final IntervalList picardIntervals = new IntervalList(picardFileHeader);
// Need one good interval or else a UserException.MalformedFile( is thrown if no intervals
picardIntervals.add(new Interval(contig, 1, 2, true, "dummyname0"));
picardIntervals.add(new Interval(contig, intervalStart, intervalEnd, true, "dummyname1"));
final File picardIntervalFile = createTempFile("testIntervalFileToListNegativeOneLength", ".intervals");
picardIntervals.write(picardIntervalFile);
IntervalUtils.intervalFileToList(genomeLocParser, picardIntervalFile.getAbsolutePath());
}
use of htsjdk.samtools.util.Interval in project gatk-protected by broadinstitute.
the class SplitIntervals method onTraversalStart.
@Override
public void onTraversalStart() {
ParamUtils.isPositive(scatterCount, "scatter count must be > 0.");
if (!outputDir.exists() && !outputDir.mkdir()) {
throw new RuntimeIOException("Unable to create directory: " + outputDir.getAbsolutePath());
}
// in general dictionary will be from the reference, but using -I reads.bam or -F variants.vcf
// to use the sequence dict from a bam or vcf is also supported
final SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
final List<SimpleInterval> intervals = hasIntervals() ? intervalArgumentCollection.getIntervals(sequenceDictionary) : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
final IntervalList intervalList = new IntervalList(sequenceDictionary);
intervals.stream().map(si -> new Interval(si.getContig(), si.getStart(), si.getEnd())).forEach(intervalList::add);
final IntervalListScatterer scatterer = new IntervalListScatterer(subdivisionMode);
final List<IntervalList> scattered = scatterer.scatter(intervalList, scatterCount, false);
final DecimalFormat formatter = new DecimalFormat("0000");
IntStream.range(0, scattered.size()).forEach(n -> scattered.get(n).write(new File(outputDir, formatter.format(n) + "-scattered.intervals")));
}
use of htsjdk.samtools.util.Interval in project gatk by broadinstitute.
the class IntervalUtils method intervalFileToList.
/**
* Read a file of genome locations to process. The file may be in Picard
* or GATK interval format.
*
* @param glParser GenomeLocParser
* @param fileName interval file
* @return List<GenomeLoc> List of Genome Locs that have been parsed from file
*/
public static List<GenomeLoc> intervalFileToList(final GenomeLocParser glParser, final String fileName) {
Utils.nonNull(glParser, "glParser is null");
Utils.nonNull(fileName, "file name is null");
final File inputFile = new File(fileName);
final List<GenomeLoc> ret = new ArrayList<>();
/**
* First try to read the file as a Picard interval file since that's well structured --
* we'll fail quickly if it's not a valid file.
*/
boolean isPicardInterval = false;
try {
// Note: Picard will skip over intervals with contigs not in the sequence dictionary
final IntervalList il = IntervalList.fromFile(inputFile);
isPicardInterval = true;
for (final Interval interval : il.getIntervals()) {
// https://github.com/broadinstitute/gatk/issues/2089
if (interval.getStart() - interval.getEnd() == 1) {
logger.warn("Ignoring possibly incorrectly converted length 1 interval : " + interval);
} else if (glParser.isValidGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true)) {
ret.add(glParser.createGenomeLoc(interval.getContig(), interval.getStart(), interval.getEnd(), true));
} else {
throw new UserException(inputFile.getAbsolutePath() + " has an invalid interval : " + interval);
}
}
}// if that didn't work, try parsing file as a GATK interval file
catch (final Exception e) {
if (// definitely a picard file, but we failed to parse
isPicardInterval) {
throw new UserException.CouldNotReadInputFile(inputFile, e);
} else {
try (XReadLines reader = new XReadLines(new File(fileName))) {
for (final String line : reader) {
if (!line.trim().isEmpty()) {
ret.add(glParser.parseGenomeLoc(line));
}
}
} catch (final IOException e2) {
throw new UserException.CouldNotReadInputFile(inputFile, e2);
}
}
}
if (ret.isEmpty()) {
throw new UserException.MalformedFile(new File(fileName), "It contains no intervals.");
}
return ret;
}
use of htsjdk.samtools.util.Interval in project gatk-protected by broadinstitute.
the class HetPulldownCalculatorUnitTest method inputGetTumorHetPulldown15.
@DataProvider(name = "inputGetTumorHetPulldownMin15")
public Object[][] inputGetTumorHetPulldown15() {
final Pulldown tumorHetPulldown = new Pulldown(normalHeader);
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("1", 14630, 14630), 9, 8));
tumorHetPulldown.add(new AllelicCount(new SimpleInterval("2", 14689, 14689), 6, 9));
final IntervalList normalHetIntervals = new IntervalList(tumorHeader);
normalHetIntervals.add(new Interval("1", 14630, 14630));
normalHetIntervals.add(new Interval("2", 14689, 14689));
return new Object[][] { { normalHetIntervals, tumorHetPulldown } };
}
Aggregations