use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.
the class SampleCollectionUnitTest method testReadGroupIds.
@Test(dataProvider = "samFileHeaderData", dependsOnMethods = "testCreation")
public void testReadGroupIds(final SAMFileHeader header) {
final SampleCollection sampleCollection = new SampleCollection(header);
final List<String> readGroupIds = sampleCollection.readGroups();
Assert.assertEquals(readGroupIds.size(), header.getReadGroups().size());
final List<String> expectedReadGroupIds = new ArrayList<>();
header.getReadGroups().forEach(rg -> expectedReadGroupIds.add(rg.getId()));
Collections.sort(expectedReadGroupIds, (a, b) -> {
final SAMReadGroupRecord arg = header.getReadGroup(a);
final SAMReadGroupRecord brg = header.getReadGroup(b);
final String aSampleId = arg.getSample();
final String bSampleId = brg.getSample();
if (Objects.equals(aSampleId, bSampleId)) {
return arg.getId().compareTo(brg.getId());
} else if (aSampleId == null) {
return 1;
} else if (bSampleId == null) {
return -1;
} else {
return aSampleId.compareTo(bSampleId);
}
});
Assert.assertEquals(readGroupIds, expectedReadGroupIds);
}
use of htsjdk.samtools.SAMReadGroupRecord in project gatk by broadinstitute.
the class LocusIteratorByStateUnitTest method testLIBS_NotHoldingTooManyReads.
@Test(enabled = true, dataProvider = "LIBS_NotHoldingTooManyReads")
public void testLIBS_NotHoldingTooManyReads(final int nReadsPerLocus, final int downsampleTo, final int payloadInBytes) {
logger.warn(String.format("testLIBS_NotHoldingTooManyReads %d %d %d", nReadsPerLocus, downsampleTo, payloadInBytes));
final int readLength = 10;
final SAMFileHeader header = ArtificialReadUtils.createArtificialSamHeader(1, 1, 100000);
final int nSamples = 1;
final List<String> samples = new ArrayList<>(nSamples);
for (int i = 0; i < nSamples; i++) {
final SAMReadGroupRecord rg = new SAMReadGroupRecord("rg" + i);
final String sample = "sample" + i;
samples.add(sample);
rg.setSample(sample);
rg.setPlatform(NGSPlatform.ILLUMINA.getDefaultPlatform());
header.addReadGroup(rg);
}
final boolean downsample = downsampleTo != -1;
final DownsamplingMethod downsampler = downsample ? new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsampleTo, null) : new DownsamplingMethod(DownsampleType.NONE, null, null);
final WeakReadTrackingIterator iterator = new WeakReadTrackingIterator(nReadsPerLocus, readLength, payloadInBytes, header);
final LocusIteratorByState li;
li = new LocusIteratorByState(iterator, downsampler, false, samples, header, true);
while (li.hasNext()) {
final AlignmentContext next = li.next();
Assert.assertTrue(next.getBasePileup().size() <= downsampleTo, "Too many elements in pileup " + next);
// TODO -- assert that there are <= X reads in memory after GC for some X
}
}
use of htsjdk.samtools.SAMReadGroupRecord in project polyGembler by c-zhou.
the class SAMtools method run.
@Override
public void run() {
// TODO Auto-generated method stub
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(new File(mySamFile));
samHeader = inputSam.getFileHeader();
samHeader.setSortOrder(SortOrder.unsorted);
SAMRecordIterator iter = inputSam.iterator();
Set<Entry<String, String>> attr = samHeader.getAttributes();
List<SAMReadGroupRecord> rgs = samHeader.getReadGroups();
SAMReadGroupRecord rg = new SAMReadGroupRecord("cz1");
rg.setSample("cz1");
samHeader.addReadGroup(rg);
// samHeader.setAttribute("RG", "cz1");
final SAMFileWriter outSam = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, true, new File(myOutput));
for (int i = 0; i < 100; i++) {
SAMRecord record = iter.next();
List<SAMTagAndValue> tags = record.getAttributes();
record.setAttribute("RG", "cz1");
List<SAMTagAndValue> tags2 = record.getAttributes();
outSam.addAlignment(record);
}
myLogger.info("exit...");
try {
inputSam.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outSam.close();
}
use of htsjdk.samtools.SAMReadGroupRecord in project polyGembler by c-zhou.
the class SamFileSplit method run.
@Override
public void run() {
// TODO Auto-generated method stub
Utils.makeOutputDir(bam_out);
final File[] beds = new File(bed_in).listFiles();
final String[] out_prefix = new String[beds.length];
for (int i = 0; i < beds.length; i++) {
out_prefix[i] = bam_out + "/" + beds[i].getName().replaceAll(".bed$", "");
Utils.makeOutputDir(out_prefix[i]);
}
final File[] bams = new File(bam_in).listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".bam");
}
});
this.initial_thread_pool();
for (File bam : bams) {
executor.submit(new Runnable() {
private File bam;
@Override
public void run() {
// TODO Auto-generated method stub
try {
final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
final SamReader inputSam = factory.open(bam);
final SAMFileHeader header = inputSam.getFileHeader();
final SAMRecordIterator iter = inputSam.iterator();
final SAMSequenceDictionary seqdic = header.getSequenceDictionary();
final SAMFileWriter[] outputSam = new SAMFileWriter[beds.length];
final SAMSequenceDictionary[] seqdics = new SAMSequenceDictionary[beds.length];
final Map<String, Integer> outMap = new HashMap<String, Integer>();
final String out = bam.getName();
for (int i = 0; i < beds.length; i++) {
Set<String> bed_seq = new HashSet<String>();
String tmp;
BufferedReader br = new BufferedReader(new FileReader(beds[i]));
String line;
while ((line = br.readLine()) != null) {
tmp = line.split("\\s+")[0];
bed_seq.add(tmp);
outMap.put(tmp, i);
}
br.close();
final SAMFileHeader header_i = new SAMFileHeader();
final SAMSequenceDictionary seqdic_i = new SAMSequenceDictionary();
header_i.setAttribute("VN", header.getAttribute("VN"));
header_i.setAttribute("SO", header.getAttribute("SO"));
List<SAMSequenceRecord> seqs = seqdic.getSequences();
for (SAMSequenceRecord seq : seqs) if (bed_seq.contains(seq.getSequenceName()))
seqdic_i.addSequence(seq);
header_i.setSequenceDictionary(seqdic_i);
for (SAMReadGroupRecord rg : header.getReadGroups()) header_i.addReadGroup(rg);
for (SAMProgramRecord pg : header.getProgramRecords()) header_i.addProgramRecord(pg);
outputSam[i] = new SAMFileWriterFactory().makeSAMOrBAMWriter(header_i, true, new File(out_prefix[i] + "/" + out));
seqdics[i] = seqdic_i;
}
Set<String> refs = outMap.keySet();
String ref;
int f;
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (refs.contains(ref = rec.getReferenceName())) {
f = outMap.get(ref);
rec.setReferenceIndex(seqdics[f].getSequenceIndex(ref));
outputSam[f].addAlignment(rec);
}
}
iter.close();
inputSam.close();
for (int i = 0; i < outputSam.length; i++) outputSam[i].close();
myLogger.info(out + " return true");
} catch (Exception e) {
Thread t = Thread.currentThread();
t.getUncaughtExceptionHandler().uncaughtException(t, e);
e.printStackTrace();
executor.shutdown();
System.exit(1);
}
}
public Runnable init(File bam) {
this.bam = bam;
return (this);
}
}.init(bam));
}
this.waitFor();
}
use of htsjdk.samtools.SAMReadGroupRecord in project gridss by PapenfussLab.
the class TestHelper method getHeader.
public static SAMFileHeader getHeader() {
SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(getSequenceDictionary());
SAMReadGroupRecord rg = new SAMReadGroupRecord("RG");
rg.setSample("sample");
header.addReadGroup(rg);
return header;
}
Aggregations