use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class NgsFilesScanner method readBam.
@Override
protected void readBam(final File f) {
if (!f.canRead())
return;
SamReader r = null;
try {
StringWriter sw = new StringWriter();
XMLOutputFactory xof = XMLOutputFactory.newFactory();
XMLStreamWriter out = xof.createXMLStreamWriter(new StreamResult(sw));
out.writeStartElement("bam");
writeFile(out, f);
r = super.openSamReader(f.getPath());
final SAMFileHeader h = r.getFileHeader();
out.writeStartElement("samples");
if (h != null && h.getReadGroups() != null) {
Set<String> seen = new HashSet<String>();
for (SAMReadGroupRecord rg : h.getReadGroups()) {
String sample = rg.getSample();
if (sample == null || sample.isEmpty() || seen.contains(sample))
continue;
seen.add(sample);
out.writeStartElement("sample");
out.writeCharacters(sample);
out.writeEndElement();
}
}
out.writeEndElement();
out.writeEndElement();
out.flush();
out.close();
sw.flush();
put(f, sw.toString());
} catch (Exception e) {
LOG.warning(e);
} finally {
CloserUtil.close(r);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class NgsFilesSummary method readBam.
@Override
protected void readBam(final File f) {
if (!f.canRead())
return;
SamReader r = null;
try {
r = super.openSamReader(f.getPath());
SAMFileHeader h = r.getFileHeader();
if (h != null && h.getReadGroups() != null && h.getReadGroups().isEmpty()) {
for (final SAMReadGroupRecord rg : h.getReadGroups()) {
String sample = rg.getSample();
if (StringUtil.isBlank(sample)) {
sample = "_NO_SAMPLE_RG_";
}
print(sample, InfoType.BAM, f);
}
} else {
print("_NO_READ_GROUP_", InfoType.BAM, f);
}
} catch (final Exception e) {
LOG.warning(e);
} finally {
CloserUtil.close(r);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class PcrSliceReads method doWork.
@Override
public int doWork(List<String> args) {
if (bedFile == null) {
LOG.error("undefined bed file");
return -1;
}
BufferedReader r = null;
SamReader samReader = null;
try {
samReader = super.openSamReader(oneFileOrNull(args));
final BedLineCodec codec = new BedLineCodec();
r = IOUtils.openFileForBufferedReading(bedFile);
String line;
while ((line = r.readLine()) != null) {
final BedLine bed = codec.decode(line);
if (bed == null)
continue;
final String chrom = bed.getContig();
int chromStart1 = bed.getStart();
int chromEnd1 = bed.getEnd();
if (chromStart1 < 1 || chromStart1 > chromEnd1) {
LOG.error("Bad bed line " + line);
return -1;
}
final String name = bed.get(3).trim();
if (name == null || name.isEmpty()) {
LOG.error("Bad bed line (name missing) in " + line);
return -1;
}
final Interval i = new Interval(chrom, chromStart1, chromEnd1, false, name);
this.bedIntervals.put(i, i);
}
return run(samReader);
} catch (Exception e) {
LOG.error(e);
return -1;
} finally {
CloserUtil.close(r);
CloserUtil.close(samReader);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class SamSlop method doWork.
@Override
public int doWork(List<String> args) {
if (this.faidx == null) {
LOG.error("Reference was not specified.");
return -1;
}
if (this.defaultQual.length() != 1) {
LOG.error("default quality should have length==1 " + this.defaultQual);
}
GenomicSequence genomicSequence = null;
SamReader sfr = null;
SAMFileWriter sfw = null;
IndexedFastaSequenceFile indexedFastaSequenceFile = null;
final char defaultQUAL = this.defaultQual.charAt(0);
try {
final String inputName = oneFileOrNull(args);
LOG.info("Loading reference");
indexedFastaSequenceFile = new IndexedFastaSequenceFile(faidx);
sfr = openSamReader(inputName);
final SAMFileHeader header = sfr.getFileHeader();
header.setSortOrder(SortOrder.unsorted);
sfw = writingBamArgs.openSAMFileWriter(outputFile, header, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
final Cigar cigar = rec.getCigar();
if (rec.getReadUnmappedFlag() || cigar == null || cigar.isEmpty() || rec.getReadBases() == SAMRecord.NULL_SEQUENCE || (this.extend5 <= 0 && this.extend3 <= 0)) {
sfw.addAlignment(rec);
continue;
}
final StringBuilder sbs = new StringBuilder(rec.getReadString());
final StringBuilder sbq = new StringBuilder(rec.getBaseQualityString());
if (genomicSequence == null || genomicSequence.getSAMSequenceRecord().getSequenceIndex() != rec.getReferenceIndex()) {
genomicSequence = new GenomicSequence(indexedFastaSequenceFile, rec.getReferenceName());
}
final int refPos1 = (this.removeClip ? rec.getAlignmentStart() : rec.getUnclippedStart());
final int endAlignmend1 = (this.removeClip ? rec.getAlignmentEnd() : rec.getUnclippedEnd());
final List<CigarElement> cl = new ArrayList<>(cigar.getCigarElements());
if (!this.removeClip) {
// replace clip S/H by match M
for (int i = 0; i < cl.size(); ++i) {
final CigarElement ce = cl.get(i);
if (ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H) {
cl.set(i, new CigarElement(ce.getLength(), CigarOperator.M));
}
}
}
if (this.extend5 > 0 && refPos1 > 1) {
if (this.removeClip) {
// /remove hard + soft clip 5'
while (!cl.isEmpty()) {
// first
final CigarElement ce = cl.get(0);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.replace(0, ce.getLength(), "");
sbq.replace(0, ce.getLength(), "");
}
// remove first
cl.remove(0);
}
}
final StringBuilder prefix = new StringBuilder(this.extend5);
// /append + soft clip 5'
for (int i = 0; i < this.extend5; ++i) {
int x1 = (refPos1 - 1) - i;
// break if out of genome
if (x1 < 1)
break;
prefix.insert(0, genomicSequence.charAt(x1 - 1));
}
sbs.insert(0, prefix.toString());
// preprend quality
for (int i = 0; i < prefix.length(); ++i) sbq.insert(0, defaultQUAL);
// prepend cigar
cl.add(0, new CigarElement(prefix.length(), CigarOperator.M));
// update start pos
rec.setAlignmentStart(refPos1 - prefix.length());
}
if (this.extend3 > 0 && rec.getAlignmentEnd() < genomicSequence.length()) {
if (this.removeClip) {
// /remove hard + soft clip 3'
while (!cl.isEmpty()) {
// last
final CigarElement ce = cl.get(cl.size() - 1);
// not a clip
if (!(ce.getOperator() == CigarOperator.S || ce.getOperator() == CigarOperator.H)) {
break;
}
if (ce.getOperator() == CigarOperator.S) {
sbs.setLength(sbs.length() - ce.getLength());
sbq.setLength(sbq.length() - ce.getLength());
}
// remove last
cl.remove(cl.size() - 1);
}
}
int extend = 0;
for (int pos1 = endAlignmend1 + 1; pos1 <= (endAlignmend1 + this.extend3) && pos1 <= genomicSequence.length(); ++pos1) {
sbs.append(genomicSequence.charAt(pos1 - 1));
sbq.append(defaultQUAL);
++extend;
}
// append cigar
cl.add(new CigarElement(extend, CigarOperator.M));
}
// simplify cigar
int idx = 0;
while (idx + 1 < cl.size()) {
final CigarElement ce1 = cl.get(idx);
final CigarElement ce2 = cl.get(idx + 1);
if (ce1.getOperator() == ce2.getOperator()) {
cl.set(idx, new CigarElement(ce1.getLength() + ce2.getLength(), ce1.getOperator()));
cl.remove(idx + 1);
} else {
idx++;
}
}
rec.setCigar(new Cigar(cl));
rec.setReadString(sbs.toString());
rec.setBaseQualityString(sbq.toString());
final List<SAMValidationError> errors = rec.isValid();
if (errors != null && !errors.isEmpty()) {
for (SAMValidationError err : errors) {
LOG.error(err.getMessage());
}
}
// info("changed "+rec.getCigarString()+" to "+newCigarStr+" "+rec.getReadName()+" "+rec.getReadString());
sfw.addAlignment(rec);
}
progress.finish();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(indexedFastaSequenceFile);
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SamReader in project jvarkit by lindenb.
the class SamToJson method call.
private int call(final String inputName) throws Exception {
PrintWriter out = null;
SamReader sfr = null;
final SamJsonWriterFactory factory = SamJsonWriterFactory.newInstance().printHeader(this.print_header).printReadName(!this.disable_readName).printAttributes(!this.disable_atts).expandFlag(this.expflag).expandCigar(this.excigar);
SAMFileWriter swf = null;
try {
sfr = super.openSamReader(inputName);
out = super.openFileOrStdoutAsPrintWriter(this.output);
swf = factory.open(sfr.getFileHeader(), out);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext() && !out.checkError()) {
swf.addAlignment(iter.next());
}
iter.close();
out.flush();
swf.close();
swf = null;
LOG.info("done");
return 0;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfr);
CloserUtil.close(swf);
}
}
Aggregations