Search in sources :

Example 11 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class FragmentCollection method create.

/**
     * Generic algorithm that takes an iterable over T objects, a getter routine to extract the reads in T,
     * and returns a FragmentCollection that contains the T objects whose underlying reads either overlap (or
     * not) with their mate pairs.
     *
     * @param readContainingObjects An iterator of objects that contain SAMRecords
     * @param nElements the number of elements to be provided by the iterator, which is usually known upfront and
     *                  greatly improves the efficiency of the fragment calculation
     * @param getter a helper function that takes an object of type T and returns is associated SAMRecord
     * @param <T>
     * @return a fragment collection
     */
private static <T> FragmentCollection<T> create(final Iterable<T> readContainingObjects, final int nElements, final Function<T, GATKRead> getter) {
    Collection<T> singletons = null;
    Collection<List<T>> overlapping = null;
    Map<String, T> nameMap = null;
    int lastStart = -1;
    // build an initial map, grabbing all of the multi-read fragments
    for (final T p : readContainingObjects) {
        final GATKRead read = getter.apply(p);
        if (read.getStart() < lastStart) {
            throw new IllegalArgumentException(String.format("FragmentUtils.create assumes that the incoming objects are ordered by " + "SAMRecord alignment start, but saw a read %s with alignment start " + "%d before the previous start %d", read.getName(), read.getStart(), lastStart));
        }
        lastStart = read.getStart();
        if (!read.isPaired() || read.mateIsUnmapped() || read.getMateStart() == 0 || read.getMateStart() > read.getEnd()) {
            // if we know that this read won't overlap its mate, or doesn't have one, jump out early
            if (singletons == null) {
                // lazy init
                singletons = new ArrayList<>(nElements);
            }
            singletons.add(p);
        } else {
            // the read might overlap it's mate, or is the rightmost read of a pair
            final String readName = read.getName();
            final T pe1 = nameMap == null ? null : nameMap.get(readName);
            if (pe1 != null) {
                // assumes we have at most 2 reads per fragment
                if (overlapping == null) {
                    // lazy init
                    overlapping = new ArrayList<>();
                }
                overlapping.add(Arrays.asList(pe1, p));
                nameMap.remove(readName);
            } else {
                if (nameMap == null) {
                    // lazy init
                    nameMap = new LinkedHashMap<>(nElements);
                }
                nameMap.put(readName, p);
            }
        }
    }
    // up to the oneReadPile
    if (nameMap != null && !nameMap.isEmpty()) {
        if (singletons == null) {
            singletons = nameMap.values();
        } else {
            singletons.addAll(nameMap.values());
        }
    }
    return new FragmentCollection<>(singletons, overlapping);
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead)

Example 12 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class FragmentUtils method adjustQualsOfOverlappingPairedFragments.

public static void adjustQualsOfOverlappingPairedFragments(final List<GATKRead> overlappingPair) {
    Utils.validateArg(overlappingPair.size() == 2, () -> "Found overlapping pair with " + overlappingPair.size() + " reads, but expecting exactly 2.");
    final GATKRead firstRead = overlappingPair.get(0);
    final GATKRead secondRead = overlappingPair.get(1);
    if (ReadUtils.getSoftStart(secondRead) < ReadUtils.getSoftStart(firstRead)) {
        adjustQualsOfOverlappingPairedFragments(secondRead, firstRead);
    } else {
        adjustQualsOfOverlappingPairedFragments(firstRead, secondRead);
    }
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead)

Example 13 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class ClippingOp method applyHARDCLIP_BASES.

/**
     * Hard clip bases from read, from start to stop in base coordinates
     *
     * If start == 0, then we will clip from the front of the read, otherwise we clip
     * from the right.  If start == 0 and stop == 10, this would clip out the first
     * 10 bases of the read.
     *
     * Note that this function works with reads with negative alignment starts, in order to
     * allow us to hardClip reads that have had their soft clips reverted and so might have
     * negative alignment starts
     *
     * Works properly with reduced reads and insertion/deletion base qualities
     *
     * Note: this method does not assume that the read is directly modifiable
     * and makes a copy of it.
     *
     * @param read a non-null read
     * @param start a start >= 0 and < read.length
     * @param stop a stop >= 0 and < read.length.
     * @return a cloned version of read that has been properly trimmed down
     */
private GATKRead applyHARDCLIP_BASES(final GATKRead read, final int start, final int stop) {
    // If the read is unmapped there is no Cigar string and neither should we create a new cigar string
    //Get the cigar once to avoid multiple calls because each makes a copy of the cigar
    final Cigar cigar = read.getCigar();
    final CigarShift cigarShift = read.isUnmapped() ? new CigarShift(new Cigar(), 0, 0) : hardClipCigar(cigar, start, stop);
    // the cigar may force a shift left or right (or both) in case we are left with insertions
    // starting or ending the read after applying the hard clip on start/stop.
    final int newLength = read.getLength() - (stop - start + 1) - cigarShift.shiftFromStart - cigarShift.shiftFromEnd;
    final byte[] newBases = new byte[newLength];
    final byte[] newQuals = new byte[newLength];
    final int copyStart = (start == 0) ? stop + 1 + cigarShift.shiftFromStart : cigarShift.shiftFromStart;
    System.arraycopy(read.getBases(), copyStart, newBases, 0, newLength);
    System.arraycopy(read.getBaseQualities(), copyStart, newQuals, 0, newLength);
    final GATKRead hardClippedRead = read.copy();
    hardClippedRead.setBaseQualities(newQuals);
    hardClippedRead.setBases(newBases);
    hardClippedRead.setCigar(cigarShift.cigar);
    if (start == 0) {
        hardClippedRead.setPosition(read.getContig(), read.getStart() + calculateAlignmentStartShift(cigar, cigarShift.cigar));
    }
    if (hasBaseIndelQualities(read)) {
        final byte[] newBaseInsertionQuals = new byte[newLength];
        final byte[] newBaseDeletionQuals = new byte[newLength];
        System.arraycopy(getBaseInsertionQualities(read), copyStart, newBaseInsertionQuals, 0, newLength);
        System.arraycopy(getBaseDeletionQualities(read), copyStart, newBaseDeletionQuals, 0, newLength);
        setInsertionBaseQualities(hardClippedRead, newBaseInsertionQuals);
        setDeletionBaseQualities(hardClippedRead, newBaseDeletionQuals);
    }
    return hardClippedRead;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) Cigar(htsjdk.samtools.Cigar)

Example 14 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class ReadLikelihoods method removeSampleReads.

// Requires that the collection passed iterator can remove elements, and it can be modified.
public void removeSampleReads(final int sampleIndex, final Collection<GATKRead> readsToRemove, final int alleleCount) {
    final GATKRead[] sampleReads = readsBySampleIndex[sampleIndex];
    final int sampleReadCount = sampleReads.length;
    final Object2IntMap<GATKRead> indexByRead = readIndexBySampleIndex(sampleIndex);
    // Count how many we are going to remove, which ones (indexes) and remove entry from the read-index map.
    final boolean[] removeIndex = new boolean[sampleReadCount];
    // captures the number of deletions.
    int removeCount = 0;
    // captures the first position that was deleted.
    int firstDeleted = sampleReadCount;
    final Iterator<GATKRead> readsToRemoveIterator = readsToRemove.iterator();
    while (readsToRemoveIterator.hasNext()) {
        final GATKRead read = readsToRemoveIterator.next();
        if (indexByRead.containsKey(read)) {
            final int index = indexByRead.getInt(read);
            if (firstDeleted > index) {
                firstDeleted = index;
            }
            removeCount++;
            removeIndex[index] = true;
            readsToRemoveIterator.remove();
            indexByRead.remove(read);
        }
    }
    // Nothing to remove we just finish here.
    if (removeCount == 0) {
        return;
    }
    final int newSampleReadCount = sampleReadCount - removeCount;
    // Now we skim out the removed reads from the read array.
    final GATKRead[] oldSampleReads = readsBySampleIndex[sampleIndex];
    final GATKRead[] newSampleReads = new GATKRead[newSampleReadCount];
    System.arraycopy(oldSampleReads, 0, newSampleReads, 0, firstDeleted);
    Utils.skimArray(oldSampleReads, firstDeleted, newSampleReads, firstDeleted, removeIndex, firstDeleted);
    // Update the indices for the extant reads from the first deletion onwards.
    for (int r = firstDeleted; r < newSampleReadCount; r++) {
        indexByRead.put(newSampleReads[r], r);
    }
    // Then we skim out the likelihoods of the removed reads.
    final double[][] oldSampleValues = valuesBySampleIndex[sampleIndex];
    final double[][] newSampleValues = new double[alleleCount][newSampleReadCount];
    for (int a = 0; a < alleleCount; a++) {
        System.arraycopy(oldSampleValues[a], 0, newSampleValues[a], 0, firstDeleted);
        Utils.skimArray(oldSampleValues[a], firstDeleted, newSampleValues[a], firstDeleted, removeIndex, firstDeleted);
    }
    valuesBySampleIndex[sampleIndex] = newSampleValues;
    readsBySampleIndex[sampleIndex] = newSampleReads;
    // reset the unmodifiable list.
    readListBySampleIndex[sampleIndex] = null;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead)

Example 15 with GATKRead

use of org.broadinstitute.hellbender.utils.read.GATKRead in project gatk by broadinstitute.

the class ReadLikelihoods method overlappingReadIndicesBySampleIndex.

private int[][] overlappingReadIndicesBySampleIndex(final Locatable overlap) {
    if (overlap == null) {
        return null;
    }
    final int sampleCount = samples.numberOfSamples();
    final int[][] result = new int[sampleCount][];
    final IntArrayList buffer = new IntArrayList(200);
    final String contig = overlap.getContig();
    final int overlapStart = overlap.getStart();
    final int overlapEnd = overlap.getEnd();
    for (int s = 0; s < sampleCount; s++) {
        buffer.clear();
        final GATKRead[] sampleReads = readsBySampleIndex[s];
        final int sampleReadCount = sampleReads.length;
        buffer.ensureCapacity(sampleReadCount);
        for (int r = 0; r < sampleReadCount; r++) {
            if (unclippedReadOverlapsRegion(sampleReads[r], contig, overlapStart, overlapEnd)) {
                buffer.add(r);
            }
        }
        result[s] = buffer.toIntArray();
    }
    return result;
}
Also used : GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList)

Aggregations

GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)457 Test (org.testng.annotations.Test)286 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)163 SAMFileHeader (htsjdk.samtools.SAMFileHeader)87 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)59 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)40 ArrayList (java.util.ArrayList)34 Collectors (java.util.stream.Collectors)34 List (java.util.List)30 Cigar (htsjdk.samtools.Cigar)29 File (java.io.File)28 java.util (java.util)28 DataProvider (org.testng.annotations.DataProvider)28 JavaRDD (org.apache.spark.api.java.JavaRDD)26 Haplotype (org.broadinstitute.hellbender.utils.haplotype.Haplotype)26 Assert (org.testng.Assert)25 ReadPileup (org.broadinstitute.hellbender.utils.pileup.ReadPileup)24 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)22 Argument (org.broadinstitute.barclay.argparser.Argument)18 UserException (org.broadinstitute.hellbender.exceptions.UserException)18