use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class SoftClipAnnotator method initialize.
public void initialize() {
final Set<VCFHeaderLine> hInfo = new HashSet<>();
final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
for (final String sample : samples) {
this.sample2bam.put(sample, new ArrayList<>());
}
for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
if (isUniqueHeaderLine(line, hInfo))
hInfo.add(line);
}
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
VCFHeader header2 = new VCFHeader(vcfHeader);
header2.addMetaDataLine(this.numClipInGenotypeFormatHeaderLine);
header2.addMetaDataLine(this.filterHaveClipInVariant);
vcfWriter.writeHeader(header2);
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
for (final File samFilename : IOUtil.unrollFiles(this.samFilenames, ".bam")) {
logger.info("opening " + samFilename);
final SamReader r = srf.open(samFilename);
final Set<String> sampleset = new HashSet<>();
for (final SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
if (g.getSample() == null || !this.sample2bam.containsKey(g.getSample()))
continue;
sampleset.add(g.getSample());
}
if (sampleset.isEmpty()) {
logger.info("no VCF sample in " + samFilename);
CloserUtil.close(r);
continue;
}
this.samReaders.add(r);
for (final String sample : sampleset) {
if (!this.sample2bam.containsKey(sample))
continue;
this.sample2bam.get(sample).add(r);
}
}
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class MiniCaller method doWork.
@Override
public int doWork(final List<String> args) {
ConcatSam.ConcatSamIterator iter = null;
try {
if (this.fastaFile == null) {
LOG.error("no REF");
return -1;
}
/* load faid */
final ReferenceGenomeFactory referenceGenomeFactory = new ReferenceGenomeFactory();
this.referenceGenome = referenceGenomeFactory.openFastaFile(this.fastaFile);
this.dictionary = this.referenceGenome.getDictionary();
if (this.dictionary == null) {
LOG.error(JvarkitException.FastaDictionaryMissing.getMessage(this.fastaFile.getPath()));
}
/* create sam record iterator */
iter = new ConcatSam.Factory().addInterval(this.rgnStr).setEnableUnrollList(true).open(args);
final SAMFileHeader samFileheader = iter.getFileHeader();
final SAMSequenceDictionary dict = samFileheader.getSequenceDictionary();
if (dict == null) {
LOG.error(JvarkitException.BamDictionaryMissing.getMessage(String.join(", ", args)));
return -1;
}
if (!SequenceUtil.areSequenceDictionariesEqual(dict, this.dictionary)) {
LOG.error(JvarkitException.DictionariesAreNotTheSame.getMessage(dict, this.dictionary));
return -1;
}
final List<SAMReadGroupRecord> groups = samFileheader.getReadGroups();
if (groups == null || groups.isEmpty()) {
LOG.error("No group defined in input");
return -1;
}
final Set<String> sampleSet = groups.stream().map(srgr -> this.samRecordPartition.apply(srgr, samRecordPartition.name())).collect(Collectors.toSet());
/* create VCF metadata */
final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.GENOTYPE_KEY));
metaData.add(VCFStandardHeaderLines.getFormatLine(VCFConstants.DEPTH_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.DEPTH_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_COUNT_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_NUMBER_KEY));
metaData.add(VCFStandardHeaderLines.getInfoLine(VCFConstants.ALLELE_FREQUENCY_KEY));
metaData.add(new VCFFormatHeaderLine("DPG", // one value of each genotype
VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Depth for each allele"));
metaData.add(new VCFFormatHeaderLine("DP4", 4, VCFHeaderLineType.Integer, "Depth ReforAlt|Strand : RF,RR,AF,AR"));
metaData.add(new VCFInfoHeaderLine("INDEL", 1, VCFHeaderLineType.Flag, "Variant is indel"));
// addMetaData(metaData);
final VCFHeader vcfHeader = new VCFHeader(metaData, sampleSet);
vcfHeader.setSequenceDictionary(this.dictionary);
/* create variant context */
this.variantContextWriter = super.openVariantContextWriter(outputFile);
this.variantContextWriter.writeHeader(vcfHeader);
ReferenceContig genomicSeq = null;
SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(this.dictionary);
for (; ; ) {
SAMRecord rec = null;
if (iter.hasNext()) {
rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag())
continue;
if (this.readFilter.filterOut(rec))
continue;
/* flush buffer if needed */
while (!this.buffer.isEmpty() && (this.buffer.get(0).tid < rec.getReferenceIndex() || (this.buffer.get(0).tid == rec.getReferenceIndex() && (this.buffer.get(0).getEnd()) < rec.getAlignmentStart()))) {
this.buffer.remove(0).print();
}
/* get genomic sequence at this position */
if (genomicSeq == null || !genomicSeq.getContig().equals(rec.getContig())) {
genomicSeq = this.referenceGenome.getContig(rec.getContig());
}
final Cigar cigar = rec.getCigar();
if (cigar == null)
continue;
int readPos = 0;
// 0 based-reference
int refPos0 = rec.getAlignmentStart() - 1;
final byte[] bases = rec.getReadBases();
final byte[] quals = rec.getBaseQualities();
final String sampleName = this.samRecordPartition.getPartion(rec, samRecordPartition.name());
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
switch(op) {
case P:
break;
case H:
break;
case S:
readPos += ce.getLength();
break;
// go
case N:
case D:
{
if (// we need base before deletion
refPos0 > 0) {
char refBase = genomicSeq.charAt(refPos0 - 1);
/* we use base before deletion */
final StringBuilder sb = new StringBuilder(ce.getLength());
sb.append(refBase);
for (int i = 0; i < ce.getLength(); ++i) {
sb.append(genomicSeq.charAt(refPos0 + i));
}
findContext(rec.getReferenceIndex(), // we use base *before deletion */
refPos0 - 1, Allele.create(sb.toString(), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf(refBase), false)).incr(rec.getReadNegativeStrandFlag());
}
refPos0 += ce.getLength();
break;
}
case I:
{
if (refPos0 > 0) {
// float qual=0;
char refBase = Character.toUpperCase(genomicSeq.charAt(refPos0 - 1));
final StringBuilder sb = new StringBuilder(1 + ce.getLength());
sb.append(refBase);
for (int i = 0; i < ce.getLength(); ++i) {
sb.append((char) bases[readPos + i]);
// qual+=(readPos + i < quals.length?quals[ readPos + i]:0);
}
findContext(rec.getReferenceIndex(), // we use base *before deletion */
refPos0 - 1, Allele.create(String.valueOf(refBase), true)).getSample(sampleName).getAllele(Allele.create(sb.toString().toUpperCase(), false)).incr(rec.getReadNegativeStrandFlag());
}
readPos += ce.getLength();
break;
}
case EQ:
case M:
case X:
{
for (int i = 0; i < ce.getLength(); ++i) {
findContext(rec.getReferenceIndex(), refPos0 + i, Allele.create(String.valueOf(genomicSeq.charAt(refPos0 + i)), true)).getSample(sampleName).getAllele(Allele.create(String.valueOf((char) bases[readPos + i]), false)).incr(rec.getReadNegativeStrandFlag());
}
readPos += ce.getLength();
refPos0 += ce.getLength();
break;
}
default:
throw new IllegalStateException("Case statement didn't deal with cigar op: " + op);
}
}
} else {
break;
}
}
while (!buffer.isEmpty()) buffer.remove(0).print();
progress.finish();
iter.close();
iter = null;
this.variantContextWriter.close();
this.variantContextWriter = null;
return RETURN_OK;
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(this.referenceGenome);
CloserUtil.close(this.variantContextWriter);
}
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class BlastToSam method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null || !this.faidx.exists() || !this.faidx.isFile()) {
LOG.error("Option -R was not defined or dictionary missing");
return -1;
}
final boolean interleaved_input = this.EXPECTED_SIZE > 0;
final int maxRecordsInRam = 5000;
SAMFileWriter sfw = null;
XMLEventReader rx = null;
final SAMFileWriterFactory sfwf = new SAMFileWriterFactory();
sfwf.setCreateIndex(false);
sfwf.setMaxRecordsInRam(maxRecordsInRam);
sfwf.setCreateMd5File(false);
sfwf.setUseAsyncIo(false);
final SAMFileHeader header = new SAMFileHeader();
try {
LOG.info("opening " + faidx);
this.dictionary = SAMSequenceDictionaryExtractor.extractDictionary(faidx);
header.setSortOrder(SortOrder.unsorted);
header.setSequenceDictionary(this.dictionary);
final JAXBContext jc = JAXBContext.newInstance("gov.nih.nlm.ncbi.blast");
this.unmarshaller = jc.createUnmarshaller();
final XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();
xmlInputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, Boolean.FALSE);
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, Boolean.TRUE);
xmlInputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, Boolean.FALSE);
xmlInputFactory.setXMLResolver(new XMLResolver() {
@Override
public Object resolveEntity(String arg0, String arg1, String arg2, String arg3) throws XMLStreamException {
LOG.info("resolveEntity:" + arg0 + "/" + arg1 + "/" + arg2);
return null;
}
});
final String inputName = oneFileOrNull(args);
if (inputName == null) {
LOG.info("Reading from stdin");
rx = xmlInputFactory.createXMLEventReader(stdin());
} else if (args.size() == 1) {
LOG.info("Reading from " + inputName);
rx = xmlInputFactory.createXMLEventReader(IOUtils.openURIForBufferedReading(inputName));
} else {
LOG.error("Illegal number of args");
return -1;
}
final SAMProgramRecord prg2 = header.createProgramRecord();
fillHeader(rx, prg2);
final SAMProgramRecord prg1 = header.createProgramRecord();
prg1.setCommandLine(getProgramCommandLine());
prg1.setProgramVersion(getVersion());
prg1.setProgramName(getProgramName());
prg1.setPreviousProgramGroupId(prg2.getId());
final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("g1");
rg1.setLibrary("blast");
rg1.setSample("blast");
rg1.setDescription("blast");
header.addReadGroup(rg1);
sfw = this.writingBamArgs.openSAMFileWriter(outputFile, header, true);
if (interleaved_input) {
run_paired(sfw, rx, header);
} else {
run_single(sfw, rx, header);
}
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(rx);
}
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class BamStage method createReadGroupPane.
private Tab createReadGroupPane(final SAMFileHeader header) {
final TableView<SAMReadGroupRecord> table = new TableView<>(header == null ? FXCollections.observableArrayList() : FXCollections.observableArrayList(header.getReadGroups()));
table.getColumns().add(makeColumn("ID", G -> G.getId()));
table.getColumns().add(makeColumn("Sample", G -> G.getSample()));
table.getColumns().add(makeColumn("Center", G -> G.getSequencingCenter()));
table.getColumns().add(makeColumn("Platform", G -> G.getPlatform()));
table.getColumns().add(makeColumn("PlatformModel", G -> G.getPlatformModel()));
table.getColumns().add(makeColumn("PlatformUnit", G -> G.getPlatformUnit()));
table.getColumns().add(makeColumn("MedianInsertSize", G -> G.getPredictedMedianInsertSize()));
table.getColumns().add(makeColumn("Desc", G -> G.getDescription()));
table.getColumns().add(makeColumn("PU", G -> G.getPlatformUnit()));
table.getColumns().add(makeColumn("Lib", G -> G.getLibrary()));
table.getColumns().add(makeColumn("Run-Date", G -> G.getRunDate()));
final Tab tab = new Tab("ReadGroups", table);
tab.setClosable(false);
table.setPlaceholder(new Label("No BAM read-greoup."));
return tab;
}
use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.
the class SamScanSplitReads method analyseSamRecord.
private void analyseSamRecord(final SAMRecord rec) {
if (rec.getReadUnmappedFlag())
return;
if (rec.getReadFailsVendorQualityCheckFlag())
return;
if (rec.isSecondaryOrSupplementary())
return;
if (rec.getDuplicateReadFlag())
return;
final List<SAMRecord> others = SAMUtils.getOtherCanonicalAlignments(rec);
if (others.isEmpty())
return;
String sample = this.defaultSampleName;
final SAMReadGroupRecord g = rec.getReadGroup();
if (g != null) {
final String sa = g.getSample();
if (sa != null)
sample = sa;
}
IntervalTreeMap<Set<Arc>> database = this.sample2database.get(sample);
if (database == null) {
database = new IntervalTreeMap<Set<Arc>>();
this.sample2database.put(sample, database);
}
for (final SAMRecord other : others) {
analyseSamPair(database, rec, other);
}
}
Aggregations