Search in sources :

Example 41 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk-protected by broadinstitute.

the class HDF5AllelicPoNUtils method read.

/**
     * Reads an {@link AllelicPanelOfNormals} from an {@link HDF5File}.
     */
static AllelicPanelOfNormals read(final HDF5File file) {
    if (file.isPresent(ALLELIC_PANEL_GROUP_NAME)) {
        //read global hyperparameters
        final double globalAlpha = file.readDouble(GLOBAL_ALPHA_PATH);
        final double globalBeta = file.readDouble(GLOBAL_BETA_PATH);
        final AllelicPanelOfNormals.HyperparameterValues globalHyperparameterValues = new AllelicPanelOfNormals.HyperparameterValues(globalAlpha, globalBeta);
        //read SNPs
        final List<SimpleInterval> snps = readSNPs(file);
        //read local hyperparameters
        final List<AllelicPanelOfNormals.HyperparameterValues> localHyperparameterValues = readLocalhyperparameterValues(file);
        //check number of SNPs and local hyperparameters match
        if (snps.size() != localHyperparameterValues.size()) {
            throw new GATKException(String.format("Wrong number of elements in the SNPs and local hyperparameters recovered from file '%s': %d != %d", file.getFile(), snps.size(), localHyperparameterValues.size()));
        }
        //create site-to-hyperparameter map
        final Map<SimpleInterval, AllelicPanelOfNormals.HyperparameterValues> siteToHyperparameterPairMap = new HashMap<>();
        for (int i = 0; i < snps.size(); i++) {
            final SimpleInterval site = snps.get(i);
            final AllelicPanelOfNormals.HyperparameterValues hyperparameterValues = localHyperparameterValues.get(i);
            if (siteToHyperparameterPairMap.containsKey(site)) {
                throw new UserException.BadInput("Allelic panel of normals contains duplicate sites.");
            } else {
                siteToHyperparameterPairMap.put(site, hyperparameterValues);
            }
        }
        return new AllelicPanelOfNormals(globalHyperparameterValues, siteToHyperparameterPairMap);
    }
    //if HDF5 file does not contain path for allelic PoN, return empty PoN
    return AllelicPanelOfNormals.EMPTY_PON;
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 42 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class IntervalArgumentCollection method parseIntervals.

private void parseIntervals(final GenomeLocParser genomeLocParser) {
    // return if no interval arguments at all
    if (!intervalsSpecified()) {
        throw new GATKException("Cannot call parseIntervals() without specifying either intervals to include or exclude.");
    }
    GenomeLocSortedSet includeSortedSet;
    if (getIntervalStrings().isEmpty()) {
        // the -L argument isn't specified, which means that -XL was, since we checked intervalsSpecified()
        // therefore we set the include set to be the entire reference territory
        includeSortedSet = GenomeLocSortedSet.createSetFromSequenceDictionary(genomeLocParser.getSequenceDictionary());
    } else {
        try {
            includeSortedSet = IntervalUtils.loadIntervals(getIntervalStrings(), intervalSetRule, intervalMerging, intervalPadding, genomeLocParser);
        } catch (UserException.EmptyIntersection e) {
            throw new CommandLineException.BadArgumentValue("-L, --interval_set_rule", getIntervalStrings() + "," + intervalSetRule, "The specified intervals had an empty intersection");
        }
    }
    final GenomeLocSortedSet excludeSortedSet = IntervalUtils.loadIntervals(excludeIntervalStrings, IntervalSetRule.UNION, intervalMerging, intervalExclusionPadding, genomeLocParser);
    if (excludeSortedSet.contains(GenomeLoc.UNMAPPED)) {
        throw new UserException("-XL unmapped is not currently supported");
    }
    GenomeLocSortedSet intervals;
    // if no exclude arguments, can return the included set directly
    if (excludeSortedSet.isEmpty()) {
        intervals = includeSortedSet;
    } else // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
    {
        intervals = includeSortedSet.subtractRegions(excludeSortedSet);
        if (intervals.isEmpty()) {
            throw new CommandLineException.BadArgumentValue("-L,-XL", getIntervalStrings().toString() + ", " + excludeIntervalStrings.toString(), "The intervals specified for exclusion with -XL removed all territory specified by -L.");
        }
        // logging messages only printed when exclude (-XL) arguments are given
        final long toPruneSize = includeSortedSet.coveredSize();
        final long toExcludeSize = excludeSortedSet.coveredSize();
        final long intervalSize = intervals.coveredSize();
        logger.info(String.format("Initial include intervals span %d loci; exclude intervals span %d loci", toPruneSize, toExcludeSize));
        logger.info(String.format("Excluding %d loci from original intervals (%.2f%% reduction)", toPruneSize - intervalSize, (toPruneSize - intervalSize) / (0.01 * toPruneSize)));
    }
    logger.info(String.format("Processing %d bp from intervals", intervals.coveredSize()));
    // Separate out requests for unmapped records from the rest of the intervals.
    boolean traverseUnmapped = false;
    if (intervals.contains(GenomeLoc.UNMAPPED)) {
        traverseUnmapped = true;
        intervals.remove(GenomeLoc.UNMAPPED);
    }
    traversalParameters = new TraversalParameters(IntervalUtils.convertGenomeLocsToSimpleIntervals(intervals.toList()), traverseUnmapped);
}
Also used : TraversalParameters(org.broadinstitute.hellbender.engine.TraversalParameters) UserException(org.broadinstitute.hellbender.exceptions.UserException) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) CommandLineException(org.broadinstitute.barclay.argparser.CommandLineException)

Example 43 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class AddContextDataToReadSparkOptimized method fillContext.

/**
     * Given a shard that has reads and variants, query Google Genomics' Reference server and get reference info
     * (including an extra margin on either side), and fill that and the correct variants into readContext.
     */
public static ContextShard fillContext(ReferenceMultiSource refSource, ContextShard shard) {
    if (null == shard)
        return null;
    // use the function to make sure we get the exact correct amount of reference bases
    int start = Integer.MAX_VALUE;
    int end = Integer.MIN_VALUE;
    SerializableFunction<GATKRead, SimpleInterval> referenceWindowFunction = refSource.getReferenceWindowFunction();
    for (GATKRead r : shard.reads) {
        SimpleInterval readRefs = referenceWindowFunction.apply(r);
        start = Math.min(readRefs.getStart(), start);
        end = Math.max(readRefs.getEnd(), end);
    }
    if (start == Integer.MAX_VALUE) {
        // there are no reads in this shard, so we're going to remove it
        return null;
    }
    SimpleInterval refInterval = new SimpleInterval(shard.interval.getContig(), start, end);
    ReferenceBases refBases;
    try {
        refBases = refSource.getReferenceBases(null, refInterval);
    } catch (IOException x) {
        throw new GATKException("Unable to read the reference");
    }
    ArrayList<ReadContextData> readContext = new ArrayList<>();
    for (GATKRead r : shard.reads) {
        SimpleInterval readInterval = new SimpleInterval(r);
        List<GATKVariant> variantsOverlappingThisRead = shard.variantsOverlapping(readInterval);
        // we pass all the bases. That's better because this way it's just a shared
        // pointer instead of being an array copy. Downstream processing is fine with having
        // extra bases (it expects a few, actually).
        readContext.add(new ReadContextData(refBases, variantsOverlappingThisRead));
    }
    return shard.withReadContext(readContext);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) ReadContextData(org.broadinstitute.hellbender.engine.ReadContextData) ReferenceBases(org.broadinstitute.hellbender.utils.reference.ReferenceBases) GATKVariant(org.broadinstitute.hellbender.utils.variant.GATKVariant) ArrayList(java.util.ArrayList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) IOException(java.io.IOException) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Example 44 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class RevertSam method doWork.

@Override
protected Object doWork() {
    IOUtil.assertFileIsReadable(INPUT);
    IOUtil.assertFileIsWritable(OUTPUT);
    final boolean sanitizing = SANITIZE;
    final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
    final SAMFileHeader inHeader = in.getFileHeader();
    // If we are going to override SAMPLE_ALIAS or LIBRARY_NAME, make sure all the read
    // groups have the same values.
    final List<SAMReadGroupRecord> rgs = inHeader.getReadGroups();
    if (SAMPLE_ALIAS != null || LIBRARY_NAME != null) {
        boolean allSampleAliasesIdentical = true;
        boolean allLibraryNamesIdentical = true;
        for (int i = 1; i < rgs.size(); i++) {
            if (!rgs.get(0).getSample().equals(rgs.get(i).getSample())) {
                allSampleAliasesIdentical = false;
            }
            if (!rgs.get(0).getLibrary().equals(rgs.get(i).getLibrary())) {
                allLibraryNamesIdentical = false;
            }
        }
        if (SAMPLE_ALIAS != null && !allSampleAliasesIdentical) {
            throw new UserException("Read groups have multiple values for sample.  " + "A value for SAMPLE_ALIAS cannot be supplied.");
        }
        if (LIBRARY_NAME != null && !allLibraryNamesIdentical) {
            throw new UserException("Read groups have multiple values for library name.  " + "A value for library name cannot be supplied.");
        }
    }
    ////////////////////////////////////////////////////////////////////////////
    // Build the output writer with an appropriate header based on the options
    ////////////////////////////////////////////////////////////////////////////
    final boolean presorted = (inHeader.getSortOrder() == SORT_ORDER) || (SORT_ORDER == SAMFileHeader.SortOrder.queryname && SANITIZE);
    final SAMFileHeader outHeader = new SAMFileHeader();
    for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
        if (SAMPLE_ALIAS != null) {
            rg.setSample(SAMPLE_ALIAS);
        }
        if (LIBRARY_NAME != null) {
            rg.setLibrary(LIBRARY_NAME);
        }
        outHeader.addReadGroup(rg);
    }
    outHeader.setSortOrder(SORT_ORDER);
    if (!REMOVE_ALIGNMENT_INFORMATION) {
        outHeader.setSequenceDictionary(inHeader.getSequenceDictionary());
        outHeader.setProgramRecords(inHeader.getProgramRecords());
    }
    final SAMFileWriter out = createSAMWriter(OUTPUT, REFERENCE_SEQUENCE, outHeader, presorted);
    ////////////////////////////////////////////////////////////////////////////
    // Build a sorting collection to use if we are sanitizing
    ////////////////////////////////////////////////////////////////////////////
    final SortingCollection<SAMRecord> sorter;
    if (sanitizing) {
        sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
    } else {
        sorter = null;
    }
    final ProgressLogger progress = new ProgressLogger(logger, 1000000, "Reverted");
    for (final SAMRecord rec : in) {
        // Weed out non-primary and supplemental read as we don't want duplicates in the reverted file!
        if (rec.isSecondaryOrSupplementary())
            continue;
        // Actually to the reverting of the remaining records
        revertSamRecord(rec);
        if (sanitizing)
            sorter.add(rec);
        else
            out.addAlignment(rec);
        progress.record(rec);
    }
    ////////////////////////////////////////////////////////////////////////////
    if (!sanitizing) {
        out.close();
    } else {
        long total = 0, discarded = 0;
        final PeekableIterator<SAMRecord> iterator = new PeekableIterator<>(sorter.iterator());
        final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<>();
        // Figure out the quality score encoding scheme for each read group.
        for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
            final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
            final SamRecordFilter filter = new SamRecordFilter() {

                @Override
                public boolean filterOut(final SAMRecord rec) {
                    return !rec.getReadGroup().getId().equals(rg.getId());
                }

                @Override
                public boolean filterOut(final SAMRecord first, final SAMRecord second) {
                    throw new UnsupportedOperationException();
                }
            };
            readGroupToFormat.put(rg, QualityEncodingDetector.detect(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, new FilteringSamIterator(reader.iterator(), filter), RESTORE_ORIGINAL_QUALITIES));
            CloserUtil.close(reader);
        }
        for (final SAMReadGroupRecord r : readGroupToFormat.keySet()) {
            logger.info("Detected quality format for " + r.getReadGroupId() + ": " + readGroupToFormat.get(r));
        }
        if (readGroupToFormat.values().contains(FastqQualityFormat.Solexa)) {
            logger.error("No quality score encoding conversion implemented for " + FastqQualityFormat.Solexa);
            return -1;
        }
        final ProgressLogger sanitizerProgress = new ProgressLogger(logger, 1000000, "Sanitized");
        readNameLoop: while (iterator.hasNext()) {
            final List<SAMRecord> recs = fetchByReadName(iterator);
            total += recs.size();
            // Check that all the reads have bases and qualities of the same length
            for (final SAMRecord rec : recs) {
                if (rec.getReadBases().length != rec.getBaseQualities().length) {
                    logger.debug("Discarding " + recs.size() + " reads with name " + rec.getReadName() + " for mismatching bases and quals length.");
                    discarded += recs.size();
                    continue readNameLoop;
                }
            }
            // Check that if the first read is marked as unpaired that there is in fact only one read
            if (!recs.get(0).getReadPairedFlag() && recs.size() > 1) {
                logger.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because they claim to be unpaired.");
                discarded += recs.size();
                continue readNameLoop;
            }
            // Check that if we have paired reads there is exactly one first of pair and one second of pair
            if (recs.get(0).getReadPairedFlag()) {
                int firsts = 0, seconds = 0, unpaired = 0;
                for (final SAMRecord rec : recs) {
                    if (!rec.getReadPairedFlag())
                        ++unpaired;
                    if (rec.getFirstOfPairFlag())
                        ++firsts;
                    if (rec.getSecondOfPairFlag())
                        ++seconds;
                }
                if (unpaired > 0 || firsts != 1 || seconds != 1) {
                    logger.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because pairing information in corrupt.");
                    discarded += recs.size();
                    continue readNameLoop;
                }
            }
            // If we've made it this far spit the records into the output!
            for (final SAMRecord rec : recs) {
                // The only valid quality score encoding scheme is standard; if it's not standard, change it.
                final FastqQualityFormat recordFormat = readGroupToFormat.get(rec.getReadGroup());
                if (!recordFormat.equals(FastqQualityFormat.Standard)) {
                    final byte[] quals = rec.getBaseQualities();
                    for (int i = 0; i < quals.length; i++) {
                        quals[i] -= SolexaQualityConverter.ILLUMINA_TO_PHRED_SUBTRAHEND;
                    }
                    rec.setBaseQualities(quals);
                }
                out.addAlignment(rec);
                sanitizerProgress.record(rec);
            }
        }
        out.close();
        final double discardRate = discarded / (double) total;
        final NumberFormat fmt = new DecimalFormat("0.000%");
        logger.info("Discarded " + discarded + " out of " + total + " (" + fmt.format(discardRate) + ") reads in order to sanitize output.");
        if (discarded / (double) total > MAX_DISCARD_FRACTION) {
            throw new GATKException("Discarded " + fmt.format(discardRate) + " which is above MAX_DISCARD_FRACTION of " + fmt.format(MAX_DISCARD_FRACTION));
        }
    }
    CloserUtil.close(in);
    return null;
}
Also used : HashMap(java.util.HashMap) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) DecimalFormat(java.text.DecimalFormat) ProgressLogger(org.broadinstitute.hellbender.utils.runtime.ProgressLogger) SamReader(htsjdk.samtools.SamReader) FilteringSamIterator(htsjdk.samtools.filter.FilteringSamIterator) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) List(java.util.List) UserException(org.broadinstitute.hellbender.exceptions.UserException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) SAMRecord(htsjdk.samtools.SAMRecord) SAMRecordQueryNameComparator(htsjdk.samtools.SAMRecordQueryNameComparator) SAMFileHeader(htsjdk.samtools.SAMFileHeader) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) NumberFormat(java.text.NumberFormat)

Example 45 with GATKException

use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.

the class HDF5AllelicPoNUtils method read.

/**
     * Reads an {@link AllelicPanelOfNormals} from an {@link HDF5File}.
     */
static AllelicPanelOfNormals read(final HDF5File file) {
    if (file.isPresent(ALLELIC_PANEL_GROUP_NAME)) {
        //read global hyperparameters
        final double globalAlpha = file.readDouble(GLOBAL_ALPHA_PATH);
        final double globalBeta = file.readDouble(GLOBAL_BETA_PATH);
        final AllelicPanelOfNormals.HyperparameterValues globalHyperparameterValues = new AllelicPanelOfNormals.HyperparameterValues(globalAlpha, globalBeta);
        //read SNPs
        final List<SimpleInterval> snps = readSNPs(file);
        //read local hyperparameters
        final List<AllelicPanelOfNormals.HyperparameterValues> localHyperparameterValues = readLocalhyperparameterValues(file);
        //check number of SNPs and local hyperparameters match
        if (snps.size() != localHyperparameterValues.size()) {
            throw new GATKException(String.format("Wrong number of elements in the SNPs and local hyperparameters recovered from file '%s': %d != %d", file.getFile(), snps.size(), localHyperparameterValues.size()));
        }
        //create site-to-hyperparameter map
        final Map<SimpleInterval, AllelicPanelOfNormals.HyperparameterValues> siteToHyperparameterPairMap = new HashMap<>();
        for (int i = 0; i < snps.size(); i++) {
            final SimpleInterval site = snps.get(i);
            final AllelicPanelOfNormals.HyperparameterValues hyperparameterValues = localHyperparameterValues.get(i);
            if (siteToHyperparameterPairMap.containsKey(site)) {
                throw new UserException.BadInput("Allelic panel of normals contains duplicate sites.");
            } else {
                siteToHyperparameterPairMap.put(site, hyperparameterValues);
            }
        }
        return new AllelicPanelOfNormals(globalHyperparameterValues, siteToHyperparameterPairMap);
    }
    //if HDF5 file does not contain path for allelic PoN, return empty PoN
    return AllelicPanelOfNormals.EMPTY_PON;
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) GATKException(org.broadinstitute.hellbender.exceptions.GATKException)

Aggregations

GATKException (org.broadinstitute.hellbender.exceptions.GATKException)96 IOException (java.io.IOException)19 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)13 CigarElement (htsjdk.samtools.CigarElement)12 ArrayList (java.util.ArrayList)10 UserException (org.broadinstitute.hellbender.exceptions.UserException)10 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)8 Cigar (htsjdk.samtools.Cigar)7 File (java.io.File)6 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 OutputStream (java.io.OutputStream)5 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)5 VisibleForTesting (com.google.common.annotations.VisibleForTesting)4 Utils (org.broadinstitute.hellbender.utils.Utils)4 Tuple2 (scala.Tuple2)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 BufferedOutputStream (java.io.BufferedOutputStream)3 InputStream (java.io.InputStream)3 BigInteger (java.math.BigInteger)3 java.util (java.util)3