use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.
the class SoftClipAnnotator method initialize.
public void initialize() {
final Set<VCFHeaderLine> hInfo = new HashSet<>();
final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
for (final String sample : samples) {
this.sample2bam.put(sample, new ArrayList<>());
}
for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
if (isUniqueHeaderLine(line, hInfo))
hInfo.add(line);
}
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
VCFHeader header2 = new VCFHeader(vcfHeader);
header2.addMetaDataLine(this.numClipInGenotypeFormatHeaderLine);
header2.addMetaDataLine(this.filterHaveClipInVariant);
vcfWriter.writeHeader(header2);
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
for (final File samFilename : IOUtil.unrollFiles(this.samFilenames, ".bam")) {
logger.info("opening " + samFilename);
final SamReader r = srf.open(samFilename);
final Set<String> sampleset = new HashSet<>();
for (final SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
if (g.getSample() == null || !this.sample2bam.containsKey(g.getSample()))
continue;
sampleset.add(g.getSample());
}
if (sampleset.isEmpty()) {
logger.info("no VCF sample in " + samFilename);
CloserUtil.close(r);
continue;
}
this.samReaders.add(r);
for (final String sample : sampleset) {
if (!this.sample2bam.containsKey(sample))
continue;
this.sample2bam.get(sample).add(r);
}
}
}
use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.
the class ExtendReferenceWithReads method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("No REF defined");
return -1;
}
this.samReaders.clear();
PrintStream out = null;
try {
this.indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
if (dict == null) {
LOG.error("Reference file is missing a sequence dictionary (use picard)");
return -1;
}
final SamReaderFactory srf = super.createSamReaderFactory();
srf.setOption(Option.CACHE_FILE_BASED_INDEXES, true);
for (final String uri : IOUtils.unrollFiles(args)) {
LOG.info("opening BAM " + uri);
final SamReader sr = srf.open(SamInputResource.of(uri));
/* doesn't work with remote ?? */
if (!sr.hasIndex()) {
LOG.error("file " + uri + " is not indexed");
return -1;
}
final SAMFileHeader header = sr.getFileHeader();
if (!header.getSortOrder().equals(SortOrder.coordinate)) {
LOG.error("file " + uri + " not sorted on coordinate but " + header.getSortOrder());
return -1;
}
final SAMSequenceDictionary dict2 = header.getSequenceDictionary();
if (dict2 == null) {
LOG.error("file " + uri + " needs a sequence dictionary");
return -1;
}
samReaders.add(sr);
}
if (samReaders.isEmpty()) {
LOG.error("No BAM defined");
return -1;
}
out = super.openFileOrStdoutAsPrintStream(this.outputFile);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
for (final SAMSequenceRecord ssr : dict.getSequences()) {
final GenomicSequence genomic = new GenomicSequence(this.indexedFastaSequenceFile, ssr.getSequenceName());
int chromStart = 0;
int nPrinted = 0;
out.print(">");
out.print(ssr.getSequenceName());
for (final Rescue side : Rescue.values()) {
switch(side) {
case LEFT:
/* look before position 0 of chromosome */
{
final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, -1, -1, side);
int newstart = 0;
for (final Integer pos : pos2bases.keySet()) {
if (pos >= 0)
continue;
newstart = Math.min(newstart, pos);
}
while (newstart < 0) {
final Counter<Byte> count = pos2bases.get(newstart);
if (nPrinted % 60 == 0)
out.println();
out.print(consensus(count));
newstart++;
++nPrinted;
}
break;
}
case RIGHT:
/* look after position > length(chromosome) */
{
final Map<Integer, Counter<Byte>> pos2bases = this.scanRegion(ssr, -1, -1, side);
int newend = ssr.getSequenceLength();
for (final Integer pos : pos2bases.keySet()) {
if (pos < ssr.getSequenceLength())
continue;
newend = Math.max(newend, pos + 1);
}
for (int i = ssr.getSequenceLength(); i < newend; i++) {
final Counter<Byte> count = pos2bases.get(i);
if (nPrinted % 60 == 0)
out.println();
out.print(consensus(count));
++nPrinted;
}
break;
}
case CENTER:
/* 0 to chromosome-length */
{
chromStart = 0;
while (chromStart < genomic.length()) {
final char base = Character.toUpperCase(genomic.charAt(chromStart));
if (base != 'N') {
progress.watch(ssr.getSequenceName(), chromStart);
if (nPrinted % 60 == 0)
out.println();
out.print(base);
++chromStart;
++nPrinted;
continue;
}
int chromEnd = chromStart + 1;
while (chromEnd < genomic.length() && Character.toUpperCase(genomic.charAt(chromEnd)) == 'N') {
++chromEnd;
}
if (chromEnd - chromStart < this.minLenNNNNContig) {
while (chromStart < chromEnd) {
progress.watch(ssr.getSequenceName(), chromStart);
if (nPrinted % 60 == 0)
out.println();
out.print(base);
++chromStart;
++nPrinted;
}
continue;
}
final Map<Integer, Counter<Byte>> pos2bases = scanRegion(ssr, chromStart, chromEnd, side);
while (chromStart < chromEnd) {
final Counter<Byte> count = pos2bases.get(chromStart);
if (nPrinted % 60 == 0)
out.println();
if (count == null) {
out.print('N');
} else {
out.print(consensus(count));
}
++chromStart;
++nPrinted;
continue;
}
}
break;
}
}
// end switch type
}
out.println();
}
progress.finish();
out.flush();
out.close();
return RETURN_OK;
} catch (final Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(this.indexedFastaSequenceFile);
for (final SamReader r : samReaders) CloserUtil.close(r);
samReaders.clear();
}
}
use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.
the class BamFile method reOpen.
@Override
public BamFile reOpen() throws IOException {
final String url = this.getSource();
final SamReaderFactory srf = SamReaderFactory.makeDefault();
srf.validationStringency(ValidationStringency.LENIENT);
final SamInputResource sir;
if (IOUtil.isUrl(url)) {
sir = SamInputResource.of(new URL(url));
if (!this.indexFile.isPresent())
throw new IOException("Boum");
sir.index(this.indexFile.get());
} else {
sir = SamInputResource.of(new File(url));
}
final BamFile bf = new BamFile(url, srf.open(sir), this.indexFile);
bf.delete_index_on_close = false;
return bf;
}
use of htsjdk.samtools.SamReaderFactory in project jvarkit by lindenb.
the class LumpyMoreSamples method doWork.
@Override
public int doWork(final List<String> args) {
VcfIterator r = null;
VariantContextWriter vcw = null;
final Map<String, SamReader> sample2samreaders = new HashMap<>();
try {
r = super.openVcfIterator(oneFileOrNull(args));
final VCFHeader headerIn = r.getHeader();
final SAMSequenceDictionary dict = headerIn.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("input vcf"));
return -1;
}
final SamReaderFactory samReaderFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
IOUtil.slurpLines(this.bamFileList).stream().forEach(F -> {
if (F.trim().isEmpty())
return;
final SamReader sr = samReaderFactory.open(SamInputResource.of(F));
final SAMFileHeader samHeader = sr.getFileHeader();
final SAMSequenceDictionary dict2 = samHeader.getSequenceDictionary();
if (dict2 == null) {
throw new JvarkitException.BamDictionaryMissing(F);
}
if (!SequenceUtil.areSequenceDictionariesEqual(dict, dict2)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict, dict2);
}
for (final SAMReadGroupRecord rg : samHeader.getReadGroups()) {
final String sample = rg.getSample();
if (StringUtil.isBlank(sample))
continue;
final SamReader reader = sample2samreaders.get(sample);
if (reader == null) {
sample2samreaders.put(sample, reader);
} else if (reader == sr) {
continue;
} else {
throw new JvarkitException.UserError("more than one sample per bam:" + F);
}
}
});
final Set<String> inVcfSampleNames = new HashSet<>(headerIn.getSampleNamesInOrder());
final Set<String> outVcfSampleNames = new HashSet<>(inVcfSampleNames);
outVcfSampleNames.addAll(sample2samreaders.keySet());
final VCFHeader headerOut = new VCFHeader(headerIn.getMetaDataInInputOrder(), outVcfSampleNames);
final VCFFormatHeaderLine SU2 = new VCFFormatHeaderLine("SU2", 1, VCFHeaderLineType.Integer, "Number of pieces of evidence supporting the variant");
final VCFFormatHeaderLine PE2 = new VCFFormatHeaderLine("PE2", 1, VCFHeaderLineType.Integer, "Number of split reads supporting the variant");
final VCFFormatHeaderLine SR2 = new VCFFormatHeaderLine("SR2", 1, VCFHeaderLineType.Integer, "Number of paired-end reads supporting the variant");
headerOut.addMetaDataLine(SU2);
headerOut.addMetaDataLine(PE2);
headerOut.addMetaDataLine(SR2);
vcw = super.openVariantContextWriter(this.outputFile);
vcw.writeHeader(headerOut);
while (r.hasNext()) {
final VariantContext ctx = r.next();
final StructuralVariantType sttype = ctx.getStructuralVariantType();
if (sttype == null)
continue;
final int tid = dict.getSequenceIndex(ctx.getContig());
final Map<String, Genotype> genotypeMap = new HashMap<>();
ctx.getGenotypes().stream().forEach(G -> genotypeMap.put(G.getSampleName(), G));
for (final String sample : sample2samreaders.keySet()) {
final SamReader samReader = sample2samreaders.get(sample);
final SupportingReads sr = new SupportingReads();
switch(sttype) {
case DEL:
{
int pos = ctx.getStart();
int[] ci = confidenceIntervalPos(ctx);
final QueryInterval left = new QueryInterval(tid, pos - ci[0], pos + ci[1]);
int end = ctx.getEnd();
ci = confidenceIntervalEnd(ctx);
final QueryInterval right = new QueryInterval(tid, end - ci[0], end + ci[1]);
for (final SAMRecord rec : extractSupportingReads(ctx, sample, samReader, new QueryInterval[] { left, right })) {
final Cigar cigar = rec.getCigar();
if (cigar.isLeftClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getUnclippedStart(), rec.getStart() - 1);
if (qi.overlaps(left)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
if (cigar.isRightClipped()) {
final QueryInterval qi = new QueryInterval(tid, rec.getEnd() + 1, rec.getUnclippedEnd());
if (qi.overlaps(right)) {
sr.splitReads++;
if (rec.getReadPairedFlag())
sr.pairedReads++;
}
}
}
break;
}
default:
break;
}
final GenotypeBuilder gb;
if (genotypeMap.containsKey(sample)) {
gb = new GenotypeBuilder(genotypeMap.get(sample));
} else {
gb = new GenotypeBuilder(sample, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL));
}
gb.attribute(SR2.getID(), sr.splitReads);
gb.attribute(PE2.getID(), sr.pairedReads);
gb.attribute(SU2.getID(), 0);
genotypeMap.put(sample, gb.make());
}
final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
// add missing samples.
for (final String sampleName : outVcfSampleNames) {
if (genotypeMap.containsKey(sampleName))
continue;
genotypeMap.put(sampleName, new GenotypeBuilder(sampleName, Arrays.asList(Allele.NO_CALL, Allele.NO_CALL)).make());
}
vcb.genotypes(genotypeMap.values());
vcw.add(vcb.make());
}
r.close();
r = null;
sample2samreaders.values().stream().forEach(R -> CloserUtil.close(R));
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(r);
}
}
use of htsjdk.samtools.SamReaderFactory in project gatk by broadinstitute.
the class CompareSAMs method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(samFiles.get(0));
IOUtil.assertFileIsReadable(samFiles.get(1));
SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(VALIDATION_STRINGENCY);
SamReader sam1 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(0));
SamReader sam2 = factory.referenceSequence(REFERENCE_SEQUENCE).open(samFiles.get(1));
SamComparison comparison = new SamComparison(sam1, sam2);
comparison.printReport();
if (comparison.areEqual()) {
System.out.println("Files match.");
} else {
System.out.println("Files differ.");
}
CloserUtil.close(sam1);
CloserUtil.close(sam2);
return comparison.areEqual();
}
Aggregations