use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ArtificialReadUtilsUnitTest method testCreateArtificialUnmappedRead.
@Test
public void testCreateArtificialUnmappedRead() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead unmappedRead = ArtificialReadUtils.createArtificialUnmappedRead(header, new byte[] { 'A' }, new byte[] { 30 });
Assert.assertTrue(unmappedRead.isUnmapped());
Assert.assertEquals(unmappedRead.getAssignedContig(), ReadConstants.UNSET_CONTIG);
Assert.assertEquals(unmappedRead.getAssignedStart(), ReadConstants.UNSET_POSITION);
Assert.assertEquals(unmappedRead.getBases(), new byte[] { 'A' });
Assert.assertEquals(unmappedRead.getBaseQualities(), new byte[] { 30 });
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ArtificialReadUtilsUnitTest method testCreateArtificialUnmappedReadWithAssignedPosition.
@Test
public void testCreateArtificialUnmappedReadWithAssignedPosition() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader();
final GATKRead unmappedRead = ArtificialReadUtils.createArtificialUnmappedReadWithAssignedPosition(header, "1", 50, new byte[] { 'A' }, new byte[] { 30 });
Assert.assertTrue(unmappedRead.isUnmapped());
Assert.assertEquals(unmappedRead.getAssignedContig(), "1");
Assert.assertEquals(unmappedRead.getAssignedStart(), 50);
Assert.assertEquals(unmappedRead.getBases(), new byte[] { 'A' });
Assert.assertEquals(unmappedRead.getBaseQualities(), new byte[] { 30 });
}
use of htsjdk.samtools.SAMFileHeader in project gatk-protected by broadinstitute.
the class GATKProtectedVariantContextUtilsUnitTest method testGetPileup.
@Test
public void testGetPileup() {
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 1000);
final Locatable loc = new SimpleInterval("chr1", 10, 10);
final int readLength = 3;
//this read doesn't overlap {@code loc}
final GATKRead read1 = ArtificialReadUtils.createArtificialRead(header, "read1", 0, 1, readLength);
read1.setBases(Utils.dupBytes((byte) 'A', readLength));
read1.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read1.setCigar("3M");
//this read overlaps {@code loc} with a Q30 'A'
final GATKRead read2 = ArtificialReadUtils.createArtificialRead(header, "read2", 0, 10, readLength);
read2.setBases(Utils.dupBytes((byte) 'A', readLength));
read2.setBaseQualities(Utils.dupBytes((byte) 30, readLength));
read2.setCigar("3M");
//this read overlaps {@code loc} with a Q20 'C' (due to the deletion)
final GATKRead read3 = ArtificialReadUtils.createArtificialRead(header, "read3", 0, 7, readLength);
read3.setBases(Utils.dupBytes((byte) 'C', readLength));
read3.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
read3.setCigar("1M1D2M");
//this read doesn't overlap {@code loc} due to the deletion
final GATKRead read4 = ArtificialReadUtils.createArtificialRead(header, "read4", 0, 7, readLength);
read4.setBases(Utils.dupBytes((byte) 'C', readLength));
read4.setBaseQualities(Utils.dupBytes((byte) 20, readLength));
read4.setCigar("1M5D2M");
final ReadPileup pileup = GATKProtectedVariantContextUtils.getPileup(loc, Arrays.asList(read1, read2, read3, read4));
// the pileup should contain a Q30 'A' and a Q20 'C'
final int[] counts = pileup.getBaseCounts();
Assert.assertEquals(counts, new int[] { 1, 1, 0, 0 });
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadsSparkSink method writeReadsADAM.
private static void writeReadsADAM(final JavaSparkContext ctx, final String outputFile, final JavaRDD<SAMRecord> reads, final SAMFileHeader header) throws IOException {
final SequenceDictionary seqDict = SequenceDictionary.fromSAMSequenceDictionary(header.getSequenceDictionary());
final RecordGroupDictionary readGroups = RecordGroupDictionary.fromSAMHeader(header);
final JavaPairRDD<Void, AlignmentRecord> rddAlignmentRecords = reads.map(read -> {
read.setHeaderStrict(header);
AlignmentRecord alignmentRecord = GATKReadToBDGAlignmentRecordConverter.convert(read, seqDict, readGroups);
read.setHeaderStrict(null);
return alignmentRecord;
}).mapToPair(alignmentRecord -> new Tuple2<>(null, alignmentRecord));
// instantiating a Job is necessary here in order to set the Hadoop Configuration...
final Job job = Job.getInstance(ctx.hadoopConfiguration());
// ...here, which sets a config property that the AvroParquetOutputFormat needs when writing data. Specifically,
// we are writing the Avro schema to the Configuration as a JSON string. The AvroParquetOutputFormat class knows
// how to translate objects in the Avro data model to the Parquet primitives that get written.
AvroParquetOutputFormat.setSchema(job, AlignmentRecord.getClassSchema());
deleteHadoopFile(outputFile, ctx.hadoopConfiguration());
rddAlignmentRecords.saveAsNewAPIHadoopFile(outputFile, Void.class, AlignmentRecord.class, AvroParquetOutputFormat.class, job.getConfiguration());
}
use of htsjdk.samtools.SAMFileHeader in project gatk by broadinstitute.
the class ReadPositionUnitTest method test.
@Test
public void test() {
final SAMFileHeader SAM_HEADER = ArtificialReadUtils.createArtificialSamHeader(10, 0, 1000);
final List<Allele> alleles = Arrays.asList(Allele.create((byte) 'A', true), Allele.create((byte) 'C', false));
final AlleleList<Allele> alleleList = new IndexedAlleleList<>(alleles);
final int chromosomeIndex = 5;
// variant is a SNP at position 20
final int variantSite = 20;
final VariantContext vc = new VariantContextBuilder("source", Integer.toString(chromosomeIndex), variantSite, variantSite, alleles).make();
final SampleList sampleList = new IndexedSampleList("SAMPLE");
//7 length-12 reads
final Map<String, List<GATKRead>> readMap = new LinkedHashMap<>();
final List<GATKRead> reads = new ArrayList<>();
final int[] positionsOfSiteWithinReads = new int[] { 1, 2, 2, 3, 10, 10, 11 };
final int[] alignmentStarts = Arrays.stream(positionsOfSiteWithinReads).map(n -> variantSite - n).toArray();
for (int r = 0; r < positionsOfSiteWithinReads.length; r++) {
final GATKRead read = ArtificialReadUtils.createArtificialRead(SAM_HEADER, "RRR00" + r, chromosomeIndex, alignmentStarts[r], "ACGTACGTACGT".getBytes(), new byte[] { 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 }, "12M");
read.setMappingQuality(60);
reads.add(read);
}
readMap.put("SAMPLE", reads);
final ReadLikelihoods<Allele> likelihoods = new ReadLikelihoods<>(sampleList, alleleList, readMap);
//we will make the first four reads ref (median position = 2) and the last three alt (median position 10, hence
// median distance from end = 1)
final LikelihoodMatrix<Allele> matrix = likelihoods.sampleMatrix(0);
// log likelihoods are initialized to 0, so we can "turn on" a read for a particular allele by setting the
// (allele, read) entry to 10
matrix.set(0, 0, 10);
matrix.set(0, 1, 10);
matrix.set(0, 2, 10);
matrix.set(0, 3, 10);
matrix.set(1, 4, 10);
matrix.set(1, 5, 10);
matrix.set(1, 6, 10);
final ReadPosition rp = new ReadPosition();
final GenotypeBuilder gb = new GenotypeBuilder(DUMMY_GENOTYPE);
rp.annotate(null, vc, DUMMY_GENOTYPE, gb, likelihoods);
final Genotype g = gb.make();
final int[] medianRefAndAltPositions = (int[]) g.getExtendedAttribute(ReadPosition.KEY);
Assert.assertEquals(medianRefAndAltPositions[0], 2);
Assert.assertEquals(medianRefAndAltPositions[1], 1);
}
Aggregations