use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class SVUtils method getRefRDD.
/**
* Create an RDD from the reference sequences.
* The reference sequences are transformed into a single, large collection of byte arrays. The collection is then
* parallelized into an RDD.
* Each contig that exceeds a size given by REF_RECORD_LEN is broken into a series of REF_RECORD_LEN chunks with a
* K-1 base overlap between successive chunks. (I.e., for K=63, the last 62 bases in chunk n match the first 62
* bases in chunk n+1) so that we don't miss any kmers due to the chunking -- we can just kmerize each record
* independently.
*/
public static JavaRDD<byte[]> getRefRDD(final JavaSparkContext ctx, final int kSize, final ReferenceMultiSource ref, final PipelineOptions options, final SAMSequenceDictionary readsDict, final int ref_record_len, final int ref_records_per_partition) {
final SAMSequenceDictionary dict = ref.getReferenceSequenceDictionary(readsDict);
if (dict == null)
throw new GATKException("No reference dictionary available");
final int effectiveRecLen = ref_record_len - kSize + 1;
final List<byte[]> sequenceChunks = new ArrayList<>();
for (final SAMSequenceRecord rec : dict.getSequences()) {
final String seqName = rec.getSequenceName();
final int seqLen = rec.getSequenceLength();
final SimpleInterval interval = new SimpleInterval(seqName, 1, seqLen);
try {
final byte[] bases = ref.getReferenceBases(options, interval).getBases();
for (int start = 0; start < seqLen; start += effectiveRecLen) {
sequenceChunks.add(Arrays.copyOfRange(bases, start, Math.min(start + ref_record_len, seqLen)));
}
} catch (final IOException ioe) {
throw new GATKException("Can't get reference sequence bases for " + interval, ioe);
}
}
return ctx.parallelize(sequenceChunks, sequenceChunks.size() / ref_records_per_partition + 1);
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class SVUtils method writeIntervalsFile.
/** Write intervals to a file. */
public static void writeIntervalsFile(final String intervalsFile, final Collection<SVInterval> intervals, final List<String> contigNames) {
try (final OutputStreamWriter writer = new OutputStreamWriter(new BufferedOutputStream(BucketUtils.createFile(intervalsFile)))) {
for (final SVInterval interval : intervals) {
final String seqName = contigNames.get(interval.getContig());
writer.write(seqName + "\t" + (interval.getStart() + 1) + "\t" + interval.getEnd() + "\n");
}
} catch (final IOException ioe) {
throw new GATKException("Can't write intervals file " + intervalsFile, ioe);
}
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class BreakpointComplications method initForInsDel.
private void initForInsDel(final ChimericAlignment chimericAlignment, final SimpleInterval leftReferenceInterval, final SimpleInterval rightReferenceInterval, final byte[] contigSeq) {
final AlignedAssembly.AlignmentInterval firstContigRegion = chimericAlignment.regionWithLowerCoordOnContig;
final AlignedAssembly.AlignmentInterval secondContigRegion = chimericAlignment.regionWithHigherCoordOnContig;
final int r1e = leftReferenceInterval.getEnd(), r2b = rightReferenceInterval.getStart(), c1e = firstContigRegion.endInAssembledContig, c2b = secondContigRegion.startInAssembledContig;
final int // distance-1 between the two regions on reference, denoted as d1 in the comments below
distBetweenAlignRegionsOnRef = r2b - r1e - 1, // distance-1 between the two regions on contig, denoted as d2 in the comments below
distBetweenAlignRegionsOnCtg = c2b - c1e - 1;
if (distBetweenAlignRegionsOnRef > 0) {
// Deletion:
resolveComplicationForSimpleDel(contigSeq, firstContigRegion, secondContigRegion, distBetweenAlignRegionsOnCtg);
} else if (distBetweenAlignRegionsOnRef == 0 && distBetweenAlignRegionsOnCtg > 0) {
// Insertion: simple insertion, inserted sequence is the sequence [c1e+1, c2b-1] on the contig
insertedSequenceForwardStrandRep = getInsertedSequence(firstContigRegion, secondContigRegion, contigSeq);
} else if (distBetweenAlignRegionsOnRef == 0 && distBetweenAlignRegionsOnCtg < 0) {
// Tandem repeat contraction: reference has two copies but one copy was deleted on the contig; duplicated sequence on reference are [r1e-|d2|+1, r1e] and [r2b, r2b+|d2|-1]
resolveComplicationForSimpleTandupContraction(leftReferenceInterval, contigSeq, firstContigRegion, secondContigRegion, r1e, c1e, c2b);
} else if (distBetweenAlignRegionsOnRef < 0 && distBetweenAlignRegionsOnCtg >= 0) {
// Tandem repeat expansion: reference bases [r1e-|d1|+1, r1e] to contig bases [c1e-|d1|+1, c1e] and [c2b, c2b+|d1|-1] with optional inserted sequence [c1e+1, c2b-1] in between the two intervals on contig
resolveComplicationForSimpleTandupExpansion(leftReferenceInterval, contigSeq, firstContigRegion, secondContigRegion, r1e, r2b, distBetweenAlignRegionsOnCtg);
} else if (distBetweenAlignRegionsOnRef < 0 && distBetweenAlignRegionsOnCtg < 0) {
// most complicated case, see below
// Deletion: duplication with repeat number N1 on reference, N2 on contig, such that N1 <= 2*N2 (and N2<N1);
// Insertion: duplication with repeat number N1 on reference, N2 on contig, such that N2 <= 2*N1 (and N1<N2);
// in both cases, the equal sign on the right can be taken only when there's pseudo-homology between starting bases of the duplicated sequence and starting bases of the right flanking region
// the reference system with a shorter overlap (i.e. with less-negative distance between regions) has a higher repeat number
resolveComplicationForComplexTandup(contigSeq, firstContigRegion, secondContigRegion, r1e, distBetweenAlignRegionsOnRef, distBetweenAlignRegionsOnCtg);
} else if (distBetweenAlignRegionsOnRef == 0 && distBetweenAlignRegionsOnCtg == 0) {
// SNP & indel
throw new GATKException("Detected badly parsed chimeric alignment for identifying SV breakpoints; no rearrangement found: " + chimericAlignment.onErrStringRep());
}
if (insertedSequenceForwardStrandRep.isEmpty()) {
if (dupSeqRepeatNumOnCtg != dupSeqRepeatNumOnRef && null == dupSeqRepeatUnitRefSpan)
throw new GATKException("An identified breakpoint pair seem to suggest insertion but the inserted sequence is empty: " + chimericAlignment.onErrStringRep());
}
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class ReadMetadata method writeMetadata.
public static void writeMetadata(final ReadMetadata readMetadata, final String filename) {
try (final Writer writer = new BufferedWriter(new OutputStreamWriter(BucketUtils.createFile(filename)))) {
writer.write("#reads:\t" + readMetadata.getNReads() + "\n");
writer.write("#partitions:\t" + readMetadata.getNPartitions() + "\n");
writer.write("max reads/partition:\t" + readMetadata.getMaxReadsInPartition() + "\n");
writer.write("coverage:\t" + readMetadata.getCoverage() + "\n");
for (final Map.Entry<String, ReadMetadata.ReadGroupFragmentStatistics> entry : readMetadata.getAllGroupStatistics().entrySet()) {
final ReadMetadata.ReadGroupFragmentStatistics stats = entry.getValue();
String name = entry.getKey();
if (name == null)
name = NO_GROUP;
writer.write("group " + name + ":\t" + stats.getMedianFragmentSize() + "-" + stats.getMedianNegativeDeviation() + "+" + stats.getMedianPositiveDeviation() + "\n");
}
} catch (final IOException ioe) {
throw new GATKException("Can't write metadata file.", ioe);
}
}
use of org.broadinstitute.hellbender.exceptions.GATKException in project gatk by broadinstitute.
the class ReadEndsForMarkDuplicatesCodec method encode.
@Override
public void encode(final ReadEndsForMarkDuplicates read) {
try {
this.out.writeShort(read.score);
this.out.writeShort(read.libraryId);
this.out.writeByte(read.orientation);
this.out.writeInt(read.read1ReferenceIndex);
this.out.writeInt(read.read1Coordinate);
this.out.writeLong(read.read1IndexInFile);
this.out.writeInt(read.read2ReferenceIndex);
if (read.orientation > ReadEnds.R) {
this.out.writeInt(read.read2Coordinate);
this.out.writeLong(read.read2IndexInFile);
}
this.out.writeShort(read.readGroup);
this.out.writeShort(read.tile);
this.out.writeShort(read.x);
this.out.writeShort(read.y);
this.out.writeByte(read.orientationForOpticalDuplicates);
} catch (final IOException ioe) {
throw new GATKException("Exception writing ReadEnds to file.", ioe);
}
}
Aggregations