Search in sources :

Example 91 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadGroupCovariateUnitTest method runTest.

private static void runTest(final SAMReadGroupRecord rg, final String expected, final ReadGroupCovariate covariate) {
    final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeaderWithReadGroup(rg);
    GATKRead read = ArtificialReadUtils.createRandomRead(header, 10);
    read.setReadGroup(rg.getReadGroupId());
    ReadCovariates readCovariates = new ReadCovariates(read.getLength(), 1, new CovariateKeyCache());
    covariate.recordValues(read, header, readCovariates, true);
    verifyCovariateArray(readCovariates.getMismatchesKeySet(), expected, covariate);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 92 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class ReadGroupCovariateUnitTest method testReadGroupNames.

@Test
public void testReadGroupNames() {
    final String id = "MY.ID";
    final String expected = "SAMPLE.1";
    final ReadGroupCovariate covariate = new ReadGroupCovariate(new RecalibrationArgumentCollection(), Arrays.asList(expected));
    final SAMFileHeader headerWithGroups = ArtificialReadUtils.createArtificialSamHeaderWithGroups(1, 0, 100, 2);
    final List<String> rgs = Arrays.asList("rg1", "rg2");
    Assert.assertEquals(ReadGroupCovariate.getReadGroupIDs(headerWithGroups), headerWithGroups.getReadGroups().stream().map(rg -> ReadGroupCovariate.getID(rg)).collect(Collectors.toList()));
}
Also used : RecalibrationArgumentCollection(org.broadinstitute.hellbender.utils.recalibration.RecalibrationArgumentCollection) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Test(org.testng.annotations.Test)

Example 93 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class IntervalListTools method doWork.

@Override
protected Object doWork() {
    // Check inputs
    for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
    for (final File f : SECOND_INPUT) IOUtil.assertFileIsReadable(f);
    if (OUTPUT != null) {
        if (SCATTER_COUNT == 1) {
            IOUtil.assertFileIsWritable(OUTPUT);
        } else {
            IOUtil.assertDirectoryIsWritable(OUTPUT);
        }
    }
    // Read in the interval lists and apply any padding
    final List<IntervalList> lists = openIntervalLists(INPUT);
    // same for the second list
    final List<IntervalList> secondLists = openIntervalLists(SECOND_INPUT);
    if (UNIQUE && !SORT) {
        LOG.warn("UNIQUE=true requires sorting but SORT=false was specified.  Results will be sorted!");
    }
    final IntervalList result = ACTION.act(lists, secondLists);
    if (SCATTER_COUNT > 1) {
        // Scattering requires a uniqued, sorted interval list.  We want to do this up front (before BREAKING AT BANDS)
        SORT = true;
        UNIQUE = true;
    }
    if (INVERT) {
        // no need to sort, since return will be sorted by definition.
        SORT = false;
        UNIQUE = true;
    }
    final IntervalList possiblySortedResult = SORT ? result.sorted() : result;
    final IntervalList possiblyInvertedResult = INVERT ? IntervalList.invert(possiblySortedResult) : possiblySortedResult;
    //only get unique if this has been asked unless inverting (since the invert will return a unique list)
    List<Interval> finalIntervals = UNIQUE ? possiblyInvertedResult.uniqued().getIntervals() : possiblyInvertedResult.getIntervals();
    if (BREAK_BANDS_AT_MULTIPLES_OF > 0) {
        finalIntervals = IntervalList.breakIntervalsAtBandMultiples(finalIntervals, BREAK_BANDS_AT_MULTIPLES_OF);
    }
    // Decide on a PG ID and make a program group
    final SAMFileHeader header = result.getHeader();
    final Set<String> pgs = new HashSet<>();
    for (final SAMProgramRecord pg : header.getProgramRecords()) pgs.add(pg.getId());
    for (int i = 1; i < Integer.MAX_VALUE; ++i) {
        if (!pgs.contains(String.valueOf(i))) {
            final SAMProgramRecord pg = new SAMProgramRecord(String.valueOf(i));
            pg.setCommandLine(getCommandLine());
            pg.setProgramName(getClass().getSimpleName());
            header.addProgramRecord(pg);
            break;
        }
    }
    // Add any comments
    if (COMMENT != null) {
        for (final String comment : COMMENT) {
            header.addComment(comment);
        }
    }
    final IntervalList output = new IntervalList(header);
    for (final Interval i : finalIntervals) {
        output.add(i);
    }
    final List<IntervalList> resultIntervals;
    if (OUTPUT != null) {
        if (SCATTER_COUNT == 1) {
            output.write(OUTPUT);
            resultIntervals = Arrays.asList(output);
        } else {
            final List<IntervalList> scattered = writeScatterIntervals(output);
            LOG.info(String.format("Wrote %s scatter subdirectories to %s.", scattered.size(), OUTPUT));
            if (scattered.size() != SCATTER_COUNT) {
                LOG.warn(String.format("Requested scatter width of %s, but only emitted %s.  (This may be an expected consequence of running in %s mode.)", SCATTER_COUNT, scattered.size(), IntervalListScatterer.Mode.BALANCING_WITHOUT_INTERVAL_SUBDIVISION));
            }
            resultIntervals = scattered;
        }
    } else {
        resultIntervals = Arrays.asList(output);
    }
    long totalUniqueBaseCount = 0;
    long intervalCount = 0;
    for (final IntervalList finalInterval : resultIntervals) {
        totalUniqueBaseCount = finalInterval.getUniqueBaseCount();
        intervalCount += finalInterval.size();
    }
    LOG.info("Produced " + intervalCount + " intervals totalling " + totalUniqueBaseCount + " unique bases.");
    return null;
}
Also used : IntervalList(htsjdk.samtools.util.IntervalList) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Interval(htsjdk.samtools.util.Interval)

Example 94 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class LiftOverIntervalList method doWork.

/**
     * Do the work after command line has been parsed. RuntimeException may be
     * thrown by this method, and are reported appropriately.
     */
@Override
protected Object doWork() {
    assertFileIsReadable(INPUT);
    assertFileIsReadable(SEQUENCE_DICTIONARY);
    assertFileIsReadable(CHAIN);
    assertFileIsWritable(OUTPUT);
    final LiftOver liftOver = new LiftOver(CHAIN);
    liftOver.setLiftOverMinMatch(MIN_LIFTOVER_PCT);
    final IntervalList fromIntervals = fromFile(INPUT);
    final SAMFileHeader toHeader = makeDefault().getFileHeader(SEQUENCE_DICTIONARY);
    liftOver.validateToSequences(toHeader.getSequenceDictionary());
    final IntervalList toIntervals = new IntervalList(toHeader);
    boolean anyFailed = false;
    for (final Interval fromInterval : fromIntervals) {
        final Interval toInterval = liftOver.liftOver(fromInterval);
        if (toInterval != null) {
            toIntervals.add(toInterval);
        } else {
            anyFailed = true;
            logger.warn("Liftover failed for ", fromInterval, "(len ", fromInterval.length(), ")");
            final List<LiftOver.PartialLiftover> partials = liftOver.diagnosticLiftover(fromInterval);
            for (final LiftOver.PartialLiftover partial : partials) {
                logger.info(partial);
            }
        }
    }
    toIntervals.sorted();
    toIntervals.write(OUTPUT);
    return anyFailed ? 1 : 0;
}
Also used : LiftOver(htsjdk.samtools.liftover.LiftOver) IntervalList(htsjdk.samtools.util.IntervalList) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval)

Example 95 with SAMFileHeader

use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.

the class MergeSamFiles method doWork.

/** Combines multiple SAM/BAM files into one. */
@Override
protected Object doWork() {
    boolean matchedSortOrders = true;
    // Open the files for reading and writing
    final List<SamReader> readers = new ArrayList<>();
    final List<SAMFileHeader> headers = new ArrayList<>();
    {
        // Used to try and reduce redundant SDs in memory
        SAMSequenceDictionary dict = null;
        for (final File inFile : INPUT) {
            IOUtil.assertFileIsReadable(inFile);
            final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
            readers.add(in);
            headers.add(in.getFileHeader());
            // replace the duplicate copies with a single dictionary to reduce the memory footprint.
            if (dict == null) {
                dict = in.getFileHeader().getSequenceDictionary();
            } else if (dict.equals(in.getFileHeader().getSequenceDictionary())) {
                in.getFileHeader().setSequenceDictionary(dict);
            }
            matchedSortOrders = matchedSortOrders && in.getFileHeader().getSortOrder() == SORT_ORDER;
        }
    }
    // If all the input sort orders match the output sort order then just merge them and
    // write on the fly, otherwise setup to merge and sort before writing out the final file
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean presorted;
    final SAMFileHeader.SortOrder headerMergerSortOrder;
    final boolean mergingSamRecordIteratorAssumeSorted;
    if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
        logger.info("Input files are in same order as output so sorting to temp directory is not needed.");
        headerMergerSortOrder = SORT_ORDER;
        mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
        presorted = true;
    } else {
        logger.info("Sorting input files using temp directory " + TMP_DIR);
        headerMergerSortOrder = SAMFileHeader.SortOrder.unsorted;
        mergingSamRecordIteratorAssumeSorted = false;
        presorted = false;
    }
    final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
    final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
    final SAMFileHeader header = headerMerger.getMergedHeader();
    for (final String comment : COMMENT) {
        header.addComment(comment);
    }
    header.setSortOrder(SORT_ORDER);
    final SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
    if (USE_THREADING) {
        samFileWriterFactory.setUseAsyncIo(true);
    }
    try (final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, header, presorted)) {
        // Lastly loop through and write out the records
        final ProgressLogger progress = new ProgressLogger(logger, PROGRESS_INTERVAL);
        while (iterator.hasNext()) {
            final SAMRecord record = iterator.next();
            out.addAlignment(record);
            progress.record(record);
        }
        logger.info("Finished reading inputs.");
        CloserUtil.close(readers);
    }
    return null;
}
Also used : SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Aggregations

SAMFileHeader (htsjdk.samtools.SAMFileHeader)148 Test (org.testng.annotations.Test)89 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)85 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)71 File (java.io.File)23 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)17 DataProvider (org.testng.annotations.DataProvider)17 java.util (java.util)15 UserException (org.broadinstitute.hellbender.exceptions.UserException)15 ArrayList (java.util.ArrayList)14 List (java.util.List)12 Collectors (java.util.stream.Collectors)12 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)12 SAMRecord (htsjdk.samtools.SAMRecord)11 Locatable (htsjdk.samtools.util.Locatable)11 BeforeClass (org.testng.annotations.BeforeClass)11 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)10