use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class Biostar214299 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.positionFile == null) {
LOG.error("position File is not defined.");
return -1;
}
final String UNAFFECTED_SAMPLE = "UNAFFECTED";
final String AMBIGOUS_SAMPLE = "AMBIGOUS";
final String UNMAPPED = "UNMAPPED";
SamReader sfr = null;
SAMFileWriter sfw = null;
final IntervalTreeMap<Position> positionsTreeMap = new IntervalTreeMap<>();
final Set<String> samples = new HashSet<>();
try {
sfr = openSamReader(oneFileOrNull(args));
final SAMFileHeader header = sfr.getFileHeader();
final SAMSequenceDictionary dict = header.getSequenceDictionary();
if (dict == null) {
LOG.error("Dictionary missing in input sam");
return -1;
}
try (BufferedReader br = IOUtils.openFileForBufferedReading(this.positionFile)) {
String line;
while ((line = br.readLine()) != null) {
if (line.trim().isEmpty() || line.startsWith("#"))
continue;
final String[] tokens = line.split("[\t]");
if (tokens.length < 4) {
LOG.error("Not enough columns in " + line);
return -1;
}
final String contig = tokens[0];
if (dict.getSequence(contig) == null) {
LOG.error("No such contig in input's sam dictionary: " + contig);
return -1;
}
final int refpos = Integer.parseInt(tokens[1]);
final Interval interval = new Interval(contig, refpos, refpos);
Position position = positionsTreeMap.get(interval);
if (position == null) {
position = new Position();
// position.contig = contig;
position.refpos = refpos;
positionsTreeMap.put(interval, position);
}
final String bases = tokens[2].toUpperCase();
if (bases.length() != 1 || !bases.matches("[ATGC]")) {
LOG.error("in " + line + " bases should be one letter an ATGC");
return -1;
}
if (position.base2sample.containsKey(bases.charAt(0))) {
LOG.error("in " + line + " bases already defined for this position");
return -1;
}
final String sampleName = tokens[3].trim();
if (sampleName.isEmpty()) {
LOG.error("sample name cannot be empty");
return -1;
}
samples.add(sampleName);
position.base2sample.put(bases.charAt(0), sampleName);
}
} catch (final IOException err) {
LOG.error(err);
return -1;
}
if (samples.contains(UNAFFECTED_SAMPLE)) {
LOG.error("Sample cannot be named " + UNAFFECTED_SAMPLE);
return -1;
}
if (samples.contains(AMBIGOUS_SAMPLE)) {
LOG.error("Sample cannot be named " + AMBIGOUS_SAMPLE);
return -1;
}
if (samples.contains(UNMAPPED)) {
LOG.error("Sample cannot be named " + UNMAPPED);
return -1;
}
samples.add(UNAFFECTED_SAMPLE);
samples.add(AMBIGOUS_SAMPLE);
samples.add(UNMAPPED);
final SAMFileHeader newHeader = new SAMFileHeader();
newHeader.setSortOrder(header.getSortOrder());
newHeader.setSequenceDictionary(dict);
newHeader.addComment("generated with " + getProgramName() + " " + getVersion() + " Pierre Lindenbaum : " + getProgramCommandLine());
/* create groups */
for (final String sample : samples) {
final SAMReadGroupRecord rg = new SAMReadGroupRecord(sample);
rg.setSample(sample);
rg.setLibrary(sample);
newHeader.addReadGroup(rg);
}
sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, newHeader, true);
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
final SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
rec.setAttribute("RG", null);
if (rec.getReadUnmappedFlag()) {
rec.setAttribute("RG", UNMAPPED);
sfw.addAlignment(rec);
continue;
}
final Cigar cigar = rec.getCigar();
final Collection<Position> snps = positionsTreeMap.getContained(new Interval(rec.getContig(), rec.getUnclippedStart(), rec.getUnclippedEnd()));
if (snps == null || snps.isEmpty()) {
rec.setAttribute("RG", UNAFFECTED_SAMPLE);
sfw.addAlignment(rec);
continue;
}
final Map<Integer, Position> index2pos = snps.stream().collect(Collectors.toMap(P -> P.refpos, P -> P));
final Set<String> selectedSamples = new HashSet<>();
final byte[] bases = rec.getReadBases();
if (bases == null || bases.equals(SAMRecord.NULL_SEQUENCE)) {
LOG.error("Bases missing in read " + rec);
return -1;
}
int refPos1 = rec.getUnclippedStart();
int readPos0 = 0;
for (final CigarElement ce : cigar.getCigarElements()) {
final CigarOperator op = ce.getOperator();
final boolean consummeReadBaseOrSoftClip = op.consumesReadBases() || op.equals(CigarOperator.S);
if (op.consumesReferenceBases() && consummeReadBaseOrSoftClip) {
for (int i = 0; i < ce.getLength(); ++i) {
final int nowRefPos1 = (refPos1 + i);
final int nowReadPos0 = (readPos0 + i);
final Position position = index2pos.get(nowRefPos1);
if (position == null)
continue;
if (nowReadPos0 >= bases.length)
continue;
final char base = (char) Character.toUpperCase(bases[nowReadPos0]);
final String sample = position.base2sample.get(base);
if (sample == null)
continue;
selectedSamples.add(sample);
index2pos.remove(nowRefPos1);
if (index2pos.isEmpty())
break;
}
}
if (op.consumesReferenceBases())
refPos1 += ce.getLength();
if (consummeReadBaseOrSoftClip || op.equals(CigarOperator.H)) {
readPos0 += ce.getLength();
}
}
if (selectedSamples.isEmpty()) {
rec.setAttribute("RG", UNAFFECTED_SAMPLE);
} else if (selectedSamples.size() == 1) {
rec.setAttribute("RG", selectedSamples.iterator().next());
} else {
rec.setAttribute("RG", AMBIGOUS_SAMPLE);
}
sfw.addAlignment(rec);
}
progress.finish();
LOG.info("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfr);
CloserUtil.close(sfw);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class Biostar234081 method doWork.
@Override
public int doWork(java.util.List<String> args) {
SamReader in = null;
SAMFileWriter w = null;
SAMRecordIterator iter = null;
try {
in = super.openSamReader(oneFileOrNull(args));
w = this.writingBamArgs.openSAMFileWriter(this.outputFile, in.getFileHeader(), true);
iter = in.iterator();
while (iter.hasNext()) {
SAMRecord rec = iter.next();
if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
final Cigar cigar = rec.getCigar();
final List<CigarElement> elements = new ArrayList<>(cigar.numCigarElements());
for (int i = 0; i < cigar.numCigarElements(); ++i) {
CigarElement ce = cigar.getCigarElement(i);
switch(ce.getOperator()) {
case X:
case EQ:
ce = new CigarElement(ce.getLength(), CigarOperator.M);
break;
default:
break;
}
if (!elements.isEmpty() && elements.get(elements.size() - 1).getOperator() == ce.getOperator()) {
elements.set(elements.size() - 1, new CigarElement(ce.getLength() + elements.get(elements.size() - 1).getLength(), ce.getOperator()));
} else {
elements.add(ce);
}
}
rec.setCigar(new Cigar(elements));
}
w.addAlignment(rec);
}
LOG.debug("done");
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(in);
CloserUtil.close(w);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class Biostar234230 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.windowSize <= 0) {
LOG.error("Bad window size.");
return -1;
}
if (this.windowShift <= 0) {
LOG.error("Bad window shift.");
return -1;
}
SamReader in = null;
PrintWriter out = null;
SAMRecordIterator iter = null;
final List<SlidingWindow> buffer = new ArrayList<>();
try {
in = super.openSamReader(oneFileOrNull(args));
final SAMFileHeader header = in.getFileHeader();
if (header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
LOG.error("Bam is not sorted on coordinate");
return -1;
}
out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
out.print("#contig");
out.print("\t");
out.print("start");
out.print("\t");
out.print("end");
out.print("\t");
out.print("pairs_in_window");
out.print("\t");
out.print("pairs_over_window");
out.print("\t");
out.print("pairs_partial_overlap");
out.println();
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
iter = in.iterator();
String prev_contig = null;
for (; ; ) {
final SAMRecord rec = (iter.hasNext() ? progress.watch(iter.next()) : null);
if (rec != null) {
if (rec.getReadUnmappedFlag())
continue;
if (this.filter.filterOut(rec))
continue;
if (rec.getReadUnmappedFlag())
continue;
if (!rec.getReadPairedFlag())
continue;
if (rec.getMateUnmappedFlag())
continue;
if (!rec.getProperPairFlag())
continue;
if (!rec.getReferenceName().equals(rec.getMateReferenceName()))
continue;
if (rec.getMateAlignmentStart() > rec.getAlignmentStart())
continue;
}
if (rec == null || !rec.getContig().equals(prev_contig)) {
for (final SlidingWindow w : buffer) {
w.print(out, prev_contig);
}
buffer.clear();
if (rec == null)
break;
prev_contig = rec.getContig();
}
final int fragStart = rec.getMateAlignmentStart();
final int fragEnd = rec.getAlignmentEnd();
while (!buffer.isEmpty() && buffer.get(0).end < fragStart) {
buffer.remove(0).print(out, rec.getContig());
;
}
final int winStart = fragStart - fragStart % this.windowSize;
final int winEnd = fragEnd - fragEnd % this.windowSize;
for (int winPos = winStart; winPos <= winEnd; winPos += this.windowShift) {
if (buffer.isEmpty() || buffer.get(buffer.size() - 1).start < winPos) {
buffer.add(new SlidingWindow(winPos, winPos + this.windowSize));
}
}
for (SlidingWindow w : buffer) {
w.watch(fragStart, fragEnd);
}
}
progress.finish();
out.flush();
return RETURN_OK;
} catch (Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(iter);
CloserUtil.close(in);
CloserUtil.close(out);
}
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class Biostar84452 method doWork.
@Override
public int doWork(final List<String> args) {
if (!StringUtil.isBlank(this.customTag)) {
if (customTag.length() != 2 || !customTag.startsWith("X")) {
LOG.error("Bad tag: expect length=2 && start with 'X'");
return -1;
}
}
SAMFileWriter sfw = null;
SamReader sfr = null;
try {
sfr = super.openSamReader(oneFileOrNull(args));
SAMFileHeader header = sfr.getFileHeader();
sfw = this.WritingBamArgs.openSAMFileWriter(outputFile, header, true);
long nChanged = 0L;
final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
SAMRecordIterator iter = sfr.iterator();
while (iter.hasNext()) {
final SAMRecord rec = progress.watch(iter.next());
if (rec.getReadUnmappedFlag()) {
sfw.addAlignment(rec);
continue;
}
final Cigar cigar = rec.getCigar();
if (cigar == null) {
sfw.addAlignment(rec);
continue;
}
final String originalCigarSting = rec.getCigarString();
final byte[] bases = rec.getReadBases();
if (bases == null) {
sfw.addAlignment(rec);
continue;
}
final ArrayList<CigarElement> L = new ArrayList<CigarElement>();
final ByteArrayOutputStream nseq = new ByteArrayOutputStream();
final ByteArrayOutputStream nqual = new ByteArrayOutputStream();
final byte[] quals = rec.getBaseQualities();
int indexBases = 0;
for (final CigarElement ce : cigar.getCigarElements()) {
switch(ce.getOperator()) {
case S:
indexBases += ce.getLength();
break;
// cont
case H:
// cont
case P:
// cont
case N:
case D:
{
L.add(ce);
break;
}
case I:
case EQ:
case X:
case M:
{
L.add(ce);
nseq.write(bases, indexBases, ce.getLength());
if (quals.length != 0)
nqual.write(quals, indexBases, ce.getLength());
indexBases += ce.getLength();
break;
}
default:
{
throw new SAMException("Unsupported Cigar opertator:" + ce.getOperator());
}
}
}
if (indexBases != bases.length) {
throw new SAMException("ERRROR " + rec.getCigarString());
}
if (L.size() == cigar.numCigarElements()) {
sfw.addAlignment(rec);
continue;
}
++nChanged;
if (!StringUtil.isBlank(this.customTag))
rec.setAttribute(this.customTag, originalCigarSting);
rec.setCigar(new Cigar(L));
rec.setReadBases(nseq.toByteArray());
if (quals.length != 0)
rec.setBaseQualities(nqual.toByteArray());
sfw.addAlignment(rec);
}
progress.finish();
iter.close();
LOG.info("Num records changed:" + nChanged);
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(sfw);
CloserUtil.close(sfr);
}
return 0;
}
use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.
the class Bam2Raster method scan.
private void scan(final SamReader r) {
final SAMRecordIterator iter = r.query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
while (iter.hasNext()) {
final SAMRecord rec = iter.next();
if (rec.getReadUnmappedFlag())
continue;
if (this.samRecordFilter.filterOut(rec))
continue;
if (!this.interval.getContig().equals(rec.getReferenceName()))
continue;
if (super.readRight.apply(rec) < this.interval.getStart()) {
continue;
}
if (super.readLeft.apply(rec) > this.interval.getEnd()) {
break;
}
final String group = this.groupBy.apply(rec.getReadGroup());
if (group == null)
continue;
PartitionImage partition = this.key2partition.get(group);
if (partition == null) {
partition = new PartitionImage(group);
this.key2partition.put(group, partition);
}
partition.add(rec);
}
iter.close();
}
Aggregations