use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadGroupCovariateUnitTest method runTest.
private static void runTest(final SAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);
GATKRead read = ArtificialReadUtils.createRandomRead(header, 10);
read.setReadGroup(rg.getReadGroupId());
ReadCovariates readCovariates = new ReadCovariates(read.getLength(), 1, new CovariateKeyCache());
covariate.recordValues(read, header, readCovariates, true);
verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate);
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadGroupCovariateUnitTest method testReadGroupNames.
@Test
public void testReadGroupNames() {
final String id = "MY.ID";
final String expected = "SAMPLE.1";
final ReadGroupCovariate covariate = new ReadGroupCovariate(new RecalibrationArgumentCollection(), Arrays.asList(expected));
final SAMFileHeader headerWithGroups = ArtificialReadUtils.createArtificialSamHeaderWithGroups(1, 0, 100, 2);
final List<String> rgs = Arrays.asList("rg1", "rg2");
Assert.assertEquals(ReadGroupCovariate.getReadGroupIDs(headerWithGroups), headerWithGroups.getReadGroups().stream().map(rg -> ReadGroupCovariate.getID(rg)).collect(Collectors.toList()));
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class IntervalListTools method doWork.
@Override
protected Object doWork() {
// Check inputs
for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
for (final File f : SECOND_INPUT) IOUtil.assertFileIsReadable(f);
if (OUTPUT != null) {
if (SCATTER_COUNT == 1) {
IOUtil.assertFileIsWritable(OUTPUT);
} else {
IOUtil.assertDirectoryIsWritable(OUTPUT);
}
}
// Read in the interval lists and apply any padding
final List<IntervalList> lists = openIntervalLists(INPUT);
// same for the second list
final List<IntervalList> secondLists = openIntervalLists(SECOND_INPUT);
if (UNIQUE && !SORT) {
LOG.warn("UNIQUE=true requires sorting but SORT=false was specified. Results will be sorted!");
}
final IntervalList result = ACTION.act(lists, secondLists);
if (SCATTER_COUNT > 1) {
// Scattering requires a uniqued, sorted interval list. We want to do this up front (before BREAKING AT BANDS)
SORT = true;
UNIQUE = true;
}
if (INVERT) {
// no need to sort, since return will be sorted by definition.
SORT = false;
UNIQUE = true;
}
final IntervalList possiblySortedResult = SORT ? result.sorted() : result;
final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult;
//only get unique if this has been asked unless inverting (since the invert will return a unique list)
List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals();
if (BREAK_BANDS_AT_MULTIPLES_OF > 0) {
finalIntervals = IntervalList.breakIntervalsAtBandMultiples(finalIntervals, BREAK_BANDS_AT_MULTIPLES_OF);
}
// Decide on a PG ID and make a program group
final SAMFileHeader header = result.getHeader();
final Set<String> pgs = new HashSet<>();
for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId());
for (int i = 1; i < Integer.MAX_VALUE; ++i) {
if (!pgs.contains(String.valueOf(i))) {
final SAMProgramRecord pg = new SAMProgramRecord(String.valueOf(i));
pg.setCommandLine(getCommandLine());
pg.setProgramName(getClass().getSimpleName());
header.addProgramRecord(pg);
break;
}
}
// Add any comments
if (COMMENT != null) {
for (final String comment : COMMENT) {
header.addComment(comment);
}
}
final IntervalList output = new IntervalList(header);
for (final Interval i : finalIntervals) {
output.add(i);
}
final List<IntervalList> resultIntervals;
if (OUTPUT != null) {
if (SCATTER_COUNT == 1) {
output.write(OUTPUT);
resultIntervals = Arrays.asList(output);
} else {
final List<IntervalList> scattered = writeScatterIntervals(output);
LOG.info(String.format("Wrote %s scatter subdirectories to %s.", scattered.size(), OUTPUT));
if (scattered.size() != SCATTER_COUNT) {
LOG.warn(String.format("Requested scatter width of %s, but only emitted %s. (This may be an expected consequence of running in %s mode.)", SCATTER_COUNT, scattered.size(), IntervalListScatterer.Mode.BALANCING_WITHOUT_INTERVAL_SUBDIVISION));
}
resultIntervals = scattered;
}
} else {
resultIntervals = Arrays.asList(output);
}
long totalUniqueBaseCount = 0;
long intervalCount = 0;
for (final IntervalList finalInterval : resultIntervals) {
totalUniqueBaseCount = finalInterval.getUniqueBaseCount();
intervalCount += finalInterval.size();
}
LOG.info("Produced " + intervalCount + " intervals totalling " + totalUniqueBaseCount + " unique bases.");
return null;
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class LiftOverIntervalList method doWork.
/**
* Do the work after command line has been parsed. RuntimeException may be
* thrown by this method, and are reported appropriately.
*/
@Override
protected Object doWork() {
assertFileIsReadable(INPUT);
assertFileIsReadable(SEQUENCE_DICTIONARY);
assertFileIsReadable(CHAIN);
assertFileIsWritable(OUTPUT);
final LiftOver liftOver = new LiftOver(CHAIN);
liftOver.setLiftOverMinMatch(MIN_LIFTOVER_PCT);
final IntervalList fromIntervals = fromFile(INPUT);
final SAMFileHeader toHeader = makeDefault().getFileHeader(SEQUENCE_DICTIONARY);
liftOver.validateToSequences(toHeader.getSequenceDictionary());
final IntervalList toIntervals = new IntervalList(toHeader);
boolean anyFailed = false;
for (final Interval fromInterval : fromIntervals) {
final Interval toInterval = liftOver.liftOver(fromInterval);
if (toInterval != null) {
toIntervals.add(toInterval);
} else {
anyFailed = true;
logger.warn("Liftover failed for ", fromInterval, "(len ", fromInterval.length(), ")");
final List<LiftOver.PartialLiftover> partials = liftOver.diagnosticLiftover(fromInterval);
for (final LiftOver.PartialLiftover partial : partials) {
logger.info(partial);
}
}
}
toIntervals.sorted();
toIntervals.write(OUTPUT);
return anyFailed ? 1 : 0;
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class MergeSamFiles method doWork.
/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
boolean matchedSortOrders = true;
// Open the files for reading and writing
final List<SamReader> readers = new ArrayList<>();
final List<SAMFileHeader> headers = new ArrayList<>();
{
// Used to try and reduce redundant SDs in memory
SAMSequenceDictionary dict = null;
for (final File inFile : INPUT) {
IOUtil.assertFileIsReadable(inFile);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
readers.add(in);
headers.add(in.getFileHeader());
// replace the duplicate copies with a single dictionary to reduce the memory footprint.
if (dict == null) {
dict = in.getFileHeader().getSequenceDictionary();
} else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
in.getFileHeader().setSequenceDictionary(dict);
}
matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
}
}
// If all the input sort orders match the output sort order then just merge them and
// write on the fly, otherwise setup to merge and sort before writing out the final file
IOUtil.assertFileIsWritable(OUTPUT);
final boolean presorted;
final SAMFileHeader.SortOrder headerMergerSortOrder;
final boolean mergingSamRecordIteratorAssumeSorted;
if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
headerMergerSortOrder = SORT_ORDER;
mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
presorted = true;
} else {
logger.info("Sorting input files using temp directory " + TMP_DIR);
headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
mergingSamRecordIteratorAssumeSorted = false;
presorted = false;
}
final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
final SAMFileHeader header = headerMerger.getMergedHeader();
for (final String comment : COMMENT) {
header.addComment(comment);
}
header.setSortOrder(SORT_ORDER);
final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
if (USE_THREADING) {
samFileWriterFactory.setUseAsyncIo(true);
}
try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
// Lastly loop through and write out the records
final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
out.addAlignment(record);
progress.record(record);
}
logger.info("Finished reading inputs.");
CloserUtil.close(readers);
}
return null;
}
Aggregations