use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class Biostar90204 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.suffix_length < 0) {
LOG.error("Bad value of suffix_length:" + this.suffix_length);
return -1;
}
if (this.record_per_file < 1L) {
LOG.error("Bad value of record_per_file:" + this.record_per_file);
return -1;
}
SAMFileWriter sfw = null;
SAMRecordIterator iter = null;
SamReader samFileReader = null;
PrintWriter manifest = new PrintWriter(new NullOuputStream());
try {
samFileReader = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header = samFileReader.getFileHeader();
int split_file_number = 0;
long nReads = 0L;
iter = samFileReader.iterator();
if (this.manifestFile != null) {
manifest.close();
manifest = new PrintWriter(manifestFile);
}
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (this.samRecordFilter.filterOut(rec))
continue;
++nReads;
if (sfw == null) {
split_file_number++;
final String pathname = (this.prefix.isEmpty() ? "" : this.prefix + ".") + String.format("%0" + suffix_length + "d", split_file_number) + ".bam";
final File out = new File(pathname);
manifest.write(pathname);
manifest.write("\t" + (nReads) + "\t");
final SAMFileHeader header2 = header.clone();
header2.addComment("SPLIT:" + split_file_number);
header2.addComment("SPLIT:Starting from Read" + nReads);
sfw = this.writingBamArgs.openSAMFileWriter(out, header2, true);
}
sfw.addAlignment(rec);
if (nReads % record_per_file == 0) {
sfw.close();
manifest.write((nReads) + "\n");
sfw = null;
}
}
if (sfw != null) {
sfw.close();
manifest.write((nReads) + "\n");
}
manifest.flush();
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(manifest);
CloserUtil.close(sfw);
CloserUtil.close(iter);
CloserUtil.close(samFileReader);
}
return 0;
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class Biostar59647 method doWork.
@Override
public int doWork(final List<String> args) {
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
SamReader samFileReader = null;
PrintStream pout;
try {
GenomicSequence genomicSequence = null;
indexedFastaSequenceFile = new IndexedFastaSequenceFile(refFile);
samFileReader = null;
final String bamFile = oneFileOrNull(args);
samFileReader = super.openSamReader(bamFile);
if (!SequenceUtil.areSequenceDictionariesEqual(indexedFastaSequenceFile.getSequenceDictionary(), samFileReader.getFileHeader().getSequenceDictionary())) {
LOG.warning("Not the same sequence dictionaries");
}
final XMLOutputFactory xmlfactory = XMLOutputFactory.newInstance();
pout = (outputFile == null ? stdout() : new PrintStream(this.outputFile));
final XMLStreamWriter w = xmlfactory.createXMLStreamWriter(pout, "UTF-8");
w.writeStartDocument("UTF-8", "1.0");
w.writeStartElement("sam");
w.writeAttribute("bam", (bamFile == null ? "stdin" : bamFile));
w.writeAttribute("ref", refFile.getPath());
w.writeComment(getProgramCommandLine());
final SAMSequenceDictionaryProgress progess = new SAMSequenceDictionaryProgress(samFileReader.getFileHeader().getSequenceDictionary());
final SAMRecordIterator iter = samFileReader.iterator();
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
progess.watch(rec);
final byte[] readbases = rec.getReadBases();
w.writeStartElement("read");
w.writeStartElement("name");
w.writeCharacters(rec.getReadName());
w.writeEndElement();
w.writeStartElement("sequence");
w.writeCharacters(new String(readbases));
w.writeEndElement();
w.writeStartElement("flags");
for (SAMFlag f : SAMFlag.values()) {
w.writeAttribute(f.name(), String.valueOf(f.isSet(rec.getFlags())));
}
w.writeCharacters(String.valueOf(rec.getFlags()));
// flags
w.writeEndElement();
if (!rec.getReadUnmappedFlag()) {
w.writeStartElement("qual");
w.writeCharacters(String.valueOf(rec.getMappingQuality()));
w.writeEndElement();
w.writeStartElement("chrom");
w.writeAttribute("index", String.valueOf(rec.getReferenceIndex()));
w.writeCharacters(rec.getReferenceName());
w.writeEndElement();
w.writeStartElement("pos");
w.writeCharacters(String.valueOf(rec.getAlignmentStart()));
w.writeEndElement();
w.writeStartElement("cigar");
w.writeCharacters(rec.getCigarString());
w.writeEndElement();
}
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
w.writeStartElement("mate-chrom");
w.writeAttribute("index", String.valueOf(rec.getMateReferenceIndex()));
w.writeCharacters(rec.getMateReferenceName());
w.writeEndElement();
w.writeStartElement("mate-pos");
w.writeCharacters(String.valueOf(rec.getMateAlignmentStart()));
w.writeEndElement();
}
if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
if (genomicSequence == null || !genomicSequence.getChrom().equals(rec.getReferenceName())) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
w.writeStartElement("align");
int readIndex = 0;
int refIndex = rec.getAlignmentStart();
for (final CigarElement e : rec.getCigar().getCigarElements()) {
switch(e.getOperator()) {
// ignore hard clips
case H:
break;
// ignore pads
case P:
break;
// cont.
case I:
case S:
{
final int length = e.getLength();
for (int i = 0; i < length; ++i) {
w.writeEmptyElement(e.getOperator().name());
w.writeAttribute("read-index", String.valueOf(readIndex + 1));
if (readIndex >= 0 && readIndex < readbases.length) {
w.writeAttribute("read-base", String.valueOf((char) (readbases[readIndex])));
}
readIndex++;
}
break;
}
// cont. -- reference skip
case N:
case D:
{
final int length = e.getLength();
for (int i = 0; i < length; ++i) {
w.writeEmptyElement(e.getOperator().name());
w.writeAttribute("ref-index", String.valueOf(refIndex));
if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
w.writeAttribute("ref-base", String.valueOf(genomicSequence.charAt(refIndex - 1)));
}
refIndex++;
}
break;
}
case M:
case EQ:
case X:
{
final int length = e.getLength();
for (int i = 0; i < length; ++i) {
w.writeEmptyElement(e.getOperator().name());
char baseRead = '\0';
if (readIndex >= 0 && readIndex < readbases.length) {
baseRead = (char) (rec.getReadBases()[readIndex]);
w.writeAttribute("read-index", String.valueOf(readIndex + 1));
w.writeAttribute("read-base", String.valueOf(baseRead));
}
w.writeAttribute("ref-index", String.valueOf(refIndex));
if (refIndex >= 1 && refIndex <= genomicSequence.length()) {
char baseRef = genomicSequence.charAt(refIndex - 1);
w.writeAttribute("ref-base", String.valueOf(baseRef));
if (Character.toUpperCase(baseRef) != Character.toUpperCase(baseRead)) {
w.writeAttribute("mismatch", "true");
}
}
refIndex++;
readIndex++;
}
break;
}
default:
throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
}
}
w.writeEndElement();
}
w.writeEndElement();
}
iter.close();
w.writeEndElement();
w.writeEndDocument();
w.flush();
pout.flush();
CloserUtil.close(w);
CloserUtil.close(pout);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(samFileReader);
CloserUtil.close(indexedFastaSequenceFile);
}
return 0;
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class Biostar76892 method doWork.
@Override
public int doWork(final List<String> args) {
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
sfw = writingBamArgs.openSAMFileWriter(this.outputFile, sfr.getFileHeader(), true);
long nRecords = 0;
final List<SAMRecord> buffer = new ArrayList<SAMRecord>();
SAMRecordIterator iter = sfr.iterator();
for (; ; ) {
SAMRecord rec = null;
// get next record
if (iter.hasNext()) {
rec = iter.next();
++nRecords;
if (nRecords % 1000000 == 0)
LOG.info("records: " + nRecords);
if (!rec.getReadPairedFlag() || rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag() || rec.getProperPairFlag() || rec.getReferenceIndex() != rec.getMateReferenceIndex() || rec.getReadNegativeStrandFlag() == !rec.getMateNegativeStrandFlag()) {
if (!onlySaveFixed)
sfw.addAlignment(rec);
continue;
}
}
if (rec != null) {
int i = 0;
// cleanup buffer
int mate_index = -1;
while (i < buffer.size()) {
SAMRecord prev = buffer.get(i);
if (prev.getReferenceIndex() != rec.getReferenceIndex() || prev.getAlignmentEnd() + distance < rec.getAlignmentStart()) {
if (!onlySaveFixed)
sfw.addAlignment(prev);
buffer.remove(i);
} else if (prev.getReadName().equals(rec.getReadName()) && ((prev.getFirstOfPairFlag() && rec.getSecondOfPairFlag()) || (rec.getFirstOfPairFlag() && prev.getSecondOfPairFlag()))) {
mate_index = i;
++i;
} else {
++i;
}
}
if (mate_index == -1) {
buffer.add(rec);
} else {
final SAMRecord mate = buffer.get(mate_index);
buffer.remove(mate_index);
LOG.info("changing " + rec + " " + mate);
if (mate.getReadNegativeStrandFlag()) {
mate.setReadNegativeStrandFlag(false);
rec.setMateNegativeStrandFlag(mate.getReadNegativeStrandFlag());
} else {
rec.setReadNegativeStrandFlag(false);
mate.setMateNegativeStrandFlag(rec.getReadNegativeStrandFlag());
}
if (!mate.getReadFailsVendorQualityCheckFlag() && !rec.getReadFailsVendorQualityCheckFlag()) {
mate.setProperPairFlag(true);
rec.setProperPairFlag(true);
}
mate.setAttribute("rv", 1);
rec.setAttribute("rv", 1);
sfw.addAlignment(mate);
sfw.addAlignment(rec);
}
} else {
for (final SAMRecord r : buffer) {
if (!onlySaveFixed)
sfw.addAlignment(r);
}
break;
}
}
LOG.info("done");
sfw.close();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(sfr);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class Biostar78400 method doWork.
@Override
public int doWork(List<String> args) {
if (this.XML == null) {
LOG.error("XML file missing");
return -1;
}
final Map<String, Map<Integer, String>> flowcell2lane2id = new HashMap<String, Map<Integer, String>>();
SamReader sfr = null;
SAMFileWriter sfw = null;
try {
final Pattern readNameSignature = Pattern.compile(this.readNameSignatureStr);
final JAXBContext context = JAXBContext.newInstance(ReadGroup.class, ReadGroupList.class);
final Unmarshaller unmarshaller = context.createUnmarshaller();
final ReadGroupList rgl = unmarshaller.unmarshal(new StreamSource(XML), ReadGroupList.class).getValue();
if (rgl.flowcells.isEmpty()) {
LOG.error("empty XML " + XML);
return -1;
}
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = sfr.getFileHeader().clone();
header.addComment("Processed with " + getProgramName());
final Set<String> seenids = new HashSet<String>();
final List<SAMReadGroupRecord> samReadGroupRecords = new ArrayList<SAMReadGroupRecord>();
for (final FlowCell fc : rgl.flowcells) {
final Map<Integer, String> lane2id = new HashMap<Integer, String>();
for (final Lane lane : fc.lanes) {
for (final ReadGroup rg : lane.readGroups) {
if (seenids.contains(rg.id)) {
LOG.error("Group id " + rg.id + " defined twice");
return -1;
}
seenids.add(rg.id);
// create the read group we'll be using
final SAMReadGroupRecord rgrec = new SAMReadGroupRecord(rg.id);
rgrec.setLibrary(rg.library);
rgrec.setPlatform(rg.platform);
rgrec.setSample(rg.sample);
rgrec.setPlatformUnit(rg.platformunit);
if (rg.center != null)
rgrec.setSequencingCenter(rg.center);
if (rg.description != null)
rgrec.setDescription(rg.description);
lane2id.put(lane.id, rg.id);
samReadGroupRecords.add(rgrec);
}
}
if (flowcell2lane2id.containsKey(fc.name)) {
LOG.error("FlowCell id " + fc.name + " defined twice in XML");
return -1;
}
flowcell2lane2id.put(fc.name, lane2id);
}
header.setReadGroups(samReadGroupRecords);
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Matcher matcher = readNameSignature.matcher(rec.getReadName());
final String flowcellStr;
final String laneStr;
if (matcher.matches()) {
flowcellStr = matcher.group(1);
laneStr = matcher.group(2);
} else {
LOG.error("Read name " + rec.getReadName() + " doesn't match regular expression " + readNameSignature.pattern() + ". please check options");
return -1;
}
String RGID = null;
final Map<Integer, String> lane2id = flowcell2lane2id.get(flowcellStr);
if (lane2id == null)
throw new RuntimeException("Cannot get flowcell/readgroup for " + rec.getReadName());
try {
RGID = lane2id.get(Integer.parseInt(laneStr));
} catch (final Exception e) {
LOG.error("bad lane-Id in " + rec.getReadName());
return -1;
}
if (RGID == null) {
throw new RuntimeException("Cannot get RGID for " + rec.getReadName() + " flowcell:" + flowcellStr + " lane2id:" + laneStr + " dict:" + lane2id);
}
rec.setAttribute(SAMTag.RG.name(), RGID);
sfw.addAlignment(rec);
}
progress.finish();
iter.close();
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(sfr);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class SoftClipAnnotator method initialize.
public void initialize() {
final Set<VCFHeaderLine> hInfo = new HashSet<>();
final List<String> rodName = Arrays.asList(variantCollection.variants.getName());
final Set<String> samples = SampleUtils.getUniqueSamplesFromRods(getToolkit(), rodName);
for (final String sample : samples) {
this.sample2bam.put(sample, new ArrayList<>());
}
for (final VCFHeaderLine line : GATKVCFUtils.getHeaderFields(getToolkit(), rodName)) {
if (isUniqueHeaderLine(line, hInfo))
hInfo.add(line);
}
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
VCFHeader header2 = new VCFHeader(vcfHeader);
header2.addMetaDataLine(this.numClipInGenotypeFormatHeaderLine);
header2.addMetaDataLine(this.filterHaveClipInVariant);
vcfWriter.writeHeader(header2);
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
for (final File samFilename : IOUtil.unrollFiles(this.samFilenames, ".bam")) {
logger.info("opening " + samFilename);
final SamReader r = srf.open(samFilename);
final Set<String> sampleset = new HashSet<>();
for (final SAMReadGroupRecord g : r.getFileHeader().getReadGroups()) {
if (g.getSample() == null || !this.sample2bam.containsKey(g.getSample()))
continue;
sampleset.add(g.getSample());
}
if (sampleset.isEmpty()) {
logger.info("no VCF sample in " + samFilename);
CloserUtil.close(r);
continue;
}
this.samReaders.add(r);
for (final String sample : sampleset) {
if (!this.sample2bam.containsKey(sample))
continue;
this.sample2bam.get(sample).add(r);
}
}
}
Aggregations