Search in sources :

Example 46 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class VariantWalkerSpark method getVariants.

/**
     * Loads variants and the corresponding reads, reference and features into a {@link JavaRDD} for the intervals specified.
     * FOr the current implementation the reads context will always be empty.
     *
     * If no intervals were specified, returns all the variants.
     *
     * @return all variants as a {@link JavaRDD}, bounded by intervals if specified.
     */
public JavaRDD<VariantWalkerContext> getVariants(JavaSparkContext ctx) {
    SAMSequenceDictionary sequenceDictionary = getBestAvailableSequenceDictionary();
    List<SimpleInterval> intervals = hasIntervals() ? getIntervals() : IntervalUtils.getAllIntervalsForReference(sequenceDictionary);
    // use unpadded shards (padding is only needed for reference bases)
    final List<ShardBoundary> intervalShards = intervals.stream().flatMap(interval -> Shard.divideIntervalIntoShards(interval, variantShardSize, 0, sequenceDictionary).stream()).collect(Collectors.toList());
    JavaRDD<VariantContext> variants = variantsSource.getParallelVariantContexts(drivingVariantFile, getIntervals());
    VariantFilter variantFilter = makeVariantFilter();
    variants = variants.filter(variantFilter::test);
    JavaRDD<Shard<VariantContext>> shardedVariants = SparkSharder.shard(ctx, variants, VariantContext.class, sequenceDictionary, intervalShards, variantShardSize, shuffle);
    Broadcast<ReferenceMultiSource> bReferenceSource = hasReference() ? ctx.broadcast(getReference()) : null;
    Broadcast<FeatureManager> bFeatureManager = features == null ? null : ctx.broadcast(features);
    return shardedVariants.flatMap(getVariantsFunction(bReferenceSource, bFeatureManager, sequenceDictionary, variantShardPadding));
}
Also used : Broadcast(org.apache.spark.broadcast.Broadcast) VCFHeader(htsjdk.variant.vcf.VCFHeader) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Argument(org.broadinstitute.barclay.argparser.Argument) IndexUtils(org.broadinstitute.hellbender.utils.IndexUtils) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) VariantFilterLibrary(org.broadinstitute.hellbender.engine.filters.VariantFilterLibrary) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) VariantFilter(org.broadinstitute.hellbender.engine.filters.VariantFilter) org.broadinstitute.hellbender.engine(org.broadinstitute.hellbender.engine) List(java.util.List) IntervalUtils(org.broadinstitute.hellbender.utils.IntervalUtils) VariantContext(htsjdk.variant.variantcontext.VariantContext) VariantsSparkSource(org.broadinstitute.hellbender.engine.spark.datasources.VariantsSparkSource) StreamSupport(java.util.stream.StreamSupport) JavaRDD(org.apache.spark.api.java.JavaRDD) FlatMapFunction(org.apache.spark.api.java.function.FlatMapFunction) ReferenceMultiSource(org.broadinstitute.hellbender.engine.datasources.ReferenceMultiSource) VariantFilter(org.broadinstitute.hellbender.engine.filters.VariantFilter) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 47 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class GenotypingGivenAllelesUtils method composeGivenAllelesVariantContextFromRod.

/**
     * Composes the given allele variant-context providing information about the rods and reference location.
     * @param tracker the meta data tracker.
     * @param loc the query location.
     * @param snpsOnly whether we only should consider SNP variation.
     * @param logger where to output warnings.
     * @param allelesBinding the target variation context binding containing the given alleles.
     * @return never {@code null}
     */
public static VariantContext composeGivenAllelesVariantContextFromRod(final FeatureContext tracker, final Locatable loc, final boolean snpsOnly, final Logger logger, final FeatureInput<VariantContext> allelesBinding) {
    Utils.nonNull(tracker, "tracker may not be null");
    Utils.nonNull(loc, "location may not be null");
    Utils.nonNull(allelesBinding, "alleles binding may not be null");
    VariantContext vc = null;
    // search for usable record
    for (final VariantContext rodVc : tracker.getValues(allelesBinding, new SimpleInterval(loc))) {
        if (rodVc != null && !rodVc.isFiltered() && (!snpsOnly || rodVc.isSNP())) {
            if (vc == null) {
                vc = rodVc;
            } else {
                if (logger != null) {
                    logger.warn("Multiple valid VCF records detected in the alleles input file at site " + loc + ", only considering the first record");
                }
            }
        }
    }
    return vc;
}
Also used : VariantContext(htsjdk.variant.variantcontext.VariantContext) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 48 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class AssemblyRegionTrimmer method nonVariantTargetRegions.

/**
     * Calculates the list of region to trim away.
     * @param targetRegion region for which to generate the flanking regions.
     * @param variantSpan the span of the core region containing relevant variation and required padding.
     * @return never {@code null}; 0, 1 or 2 element list.
     */
private Pair<SimpleInterval, SimpleInterval> nonVariantTargetRegions(final AssemblyRegion targetRegion, final SimpleInterval variantSpan) {
    final SimpleInterval targetRegionRange = targetRegion.getSpan();
    final int finalStart = variantSpan.getStart();
    final int finalStop = variantSpan.getEnd();
    final int targetStart = targetRegionRange.getStart();
    final int targetStop = targetRegionRange.getEnd();
    final boolean preTrimmingRequired = targetStart < finalStart;
    final boolean postTrimmingRequired = targetStop > finalStop;
    if (preTrimmingRequired) {
        final String contig = targetRegionRange.getContig();
        return postTrimmingRequired ? Pair.of(new SimpleInterval(contig, targetStart, finalStart - 1), new SimpleInterval(contig, finalStop + 1, targetStop)) : Pair.of(new SimpleInterval(contig, targetStart, finalStart - 1), null);
    } else if (postTrimmingRequired) {
        return Pair.of(null, new SimpleInterval(targetRegionRange.getContig(), finalStop + 1, targetStop));
    } else {
        return Pair.of(null, null);
    }
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 49 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class Haplotype method trim.

/**
     * Create a new Haplotype derived from this one that exactly spans the provided location
     *
     * Note that this haplotype must have a contain a genome loc for this operation to be successful.  If no
     * GenomeLoc is contained than @throws an IllegalStateException
     *
     * Also loc must be fully contained within this Haplotype's genomeLoc.  If not an IllegalArgumentException is
     * thrown.
     *
     * @param loc a location completely contained within this Haplotype's location
     * @return a new Haplotype within only the bases spanning the provided location, or null for some reason the haplotype would be malformed if
     */
public Haplotype trim(final Locatable loc) {
    Utils.nonNull(loc, "Loc cannot be null");
    Utils.nonNull(genomeLocation, "Cannot trim a Haplotype without containing GenomeLoc");
    Utils.validateArg(new SimpleInterval(genomeLocation).contains(loc), () -> "Can only trim a Haplotype to a containing span.  My loc is " + genomeLocation + " but wanted trim to " + loc);
    Utils.nonNull(getCigar(), "Cannot trim haplotype without a cigar " + this);
    final int newStart = loc.getStart() - this.genomeLocation.getStart();
    final int newStop = newStart + loc.getEnd() - loc.getStart();
    final byte[] newBases = AlignmentUtils.getBasesCoveringRefInterval(newStart, newStop, getBases(), 0, getCigar());
    final Cigar newCigar = AlignmentUtils.trimCigarByReference(getCigar(), newStart, newStop);
    if (newBases == null || AlignmentUtils.startsOrEndsWithInsertionOrDeletion(newCigar)) // we cannot meaningfully chop down the haplotype, so return null
    {
        return null;
    }
    final Haplotype ret = new Haplotype(newBases, isReference());
    ret.setCigar(newCigar);
    ret.setGenomeLocation(loc);
    ret.setScore(score);
    ret.setAlignmentStartHapwrtRef(newStart + getAlignmentStartHapwrtRef());
    return ret;
}
Also used : Cigar(htsjdk.samtools.Cigar) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 50 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class ShardedIntervalIterator method next.

@Override
public SimpleInterval next() {
    if (shardedInterval == null) {
        throw new NoSuchElementException();
    }
    final SimpleInterval result = shardedInterval;
    // Advance the shard index and (if necessary) set it back to zero and get the next interval.
    advanceShardInInterval();
    return result;
}
Also used : SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) NoSuchElementException(java.util.NoSuchElementException)

Aggregations

SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)545 Test (org.testng.annotations.Test)287 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)202 File (java.io.File)102 ArrayList (java.util.ArrayList)66 DataProvider (org.testng.annotations.DataProvider)64 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)60 Collectors (java.util.stream.Collectors)53 java.util (java.util)41 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)40 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)40 UserException (org.broadinstitute.hellbender.exceptions.UserException)39 VariantContext (htsjdk.variant.variantcontext.VariantContext)36 IntStream (java.util.stream.IntStream)34 Target (org.broadinstitute.hellbender.tools.exome.Target)34 IOException (java.io.IOException)32 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)28 Assert (org.testng.Assert)27 Locatable (htsjdk.samtools.util.Locatable)26 List (java.util.List)26