Search in sources :

Example 96 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar214299 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.positionFile == null) {
        LOG.error("position File is not defined.");
        return -1;
    }
    final String UNAFFECTED_SAMPLE = "UNAFFECTED";
    final String AMBIGOUS_SAMPLE = "AMBIGOUS";
    final String UNMAPPED = "UNMAPPED";
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    final IntervalTreeMap<Position> positionsTreeMap = new IntervalTreeMap<>();
    final Set<String> samples = new HashSet<>();
    try {
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader();
        final SAMSequenceDictionary dict = header.getSequenceDictionary();
        if (dict == null) {
            LOG.error("Dictionary missing in input sam");
            return -1;
        }
        try (BufferedReader br = IOUtils.openFileForBufferedReading(this.positionFile)) {
            String line;
            while ((line = br.readLine()) != null) {
                if (line.trim().isEmpty() || line.startsWith("#"))
                    continue;
                final String[] tokens = line.split("[\t]");
                if (tokens.length < 4) {
                    LOG.error("Not enough columns in " + line);
                    return -1;
                }
                final String contig = tokens[0];
                if (dict.getSequence(contig) == null) {
                    LOG.error("No such contig in input's sam dictionary: " + contig);
                    return -1;
                }
                final int refpos = Integer.parseInt(tokens[1]);
                final Interval interval = new Interval(contig, refpos, refpos);
                Position position = positionsTreeMap.get(interval);
                if (position == null) {
                    position = new Position();
                    // position.contig = contig;
                    position.refpos = refpos;
                    positionsTreeMap.put(interval, position);
                }
                final String bases = tokens[2].toUpperCase();
                if (bases.length() != 1 || !bases.matches("[ATGC]")) {
                    LOG.error("in " + line + " bases should be one letter an ATGC");
                    return -1;
                }
                if (position.base2sample.containsKey(bases.charAt(0))) {
                    LOG.error("in " + line + " bases already defined for this position");
                    return -1;
                }
                final String sampleName = tokens[3].trim();
                if (sampleName.isEmpty()) {
                    LOG.error("sample name cannot be empty");
                    return -1;
                }
                samples.add(sampleName);
                position.base2sample.put(bases.charAt(0), sampleName);
            }
        } catch (final IOException err) {
            LOG.error(err);
            return -1;
        }
        if (samples.contains(UNAFFECTED_SAMPLE)) {
            LOG.error("Sample cannot be named " + UNAFFECTED_SAMPLE);
            return -1;
        }
        if (samples.contains(AMBIGOUS_SAMPLE)) {
            LOG.error("Sample cannot be named " + AMBIGOUS_SAMPLE);
            return -1;
        }
        if (samples.contains(UNMAPPED)) {
            LOG.error("Sample cannot be named " + UNMAPPED);
            return -1;
        }
        samples.add(UNAFFECTED_SAMPLE);
        samples.add(AMBIGOUS_SAMPLE);
        samples.add(UNMAPPED);
        final SAMFileHeader newHeader = new SAMFileHeader();
        newHeader.setSortOrder(header.getSortOrder());
        newHeader.setSequenceDictionary(dict);
        newHeader.addComment("generated with " + getProgramName() + " " + getVersion() + " Pierre Lindenbaum : " + getProgramCommandLine());
        /* create groups */
        for (final String sample : samples) {
            final SAMReadGroupRecord rg = new SAMReadGroupRecord(sample);
            rg.setSample(sample);
            rg.setLibrary(sample);
            newHeader.addReadGroup(rg);
        }
        sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, newHeader, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            rec.setAttribute("RG", null);
            if (rec.getReadUnmappedFlag()) {
                rec.setAttribute("RG", UNMAPPED);
                sfw.addAlignment(rec);
                continue;
            }
            final Cigar cigar = rec.getCigar();
            final Collection<Position> snps = positionsTreeMap.getContained(new Interval(rec.getContig(), rec.getUnclippedStart(), rec.getUnclippedEnd()));
            if (snps == null || snps.isEmpty()) {
                rec.setAttribute("RG", UNAFFECTED_SAMPLE);
                sfw.addAlignment(rec);
                continue;
            }
            final Map<Integer, Position> index2pos = snps.stream().collect(Collectors.toMap(P -> P.refpos, P -> P));
            final Set<String> selectedSamples = new HashSet<>();
            final byte[] bases = rec.getReadBases();
            if (bases == null || bases.equals(SAMRecord.NULL_SEQUENCE)) {
                LOG.error("Bases missing in read " + rec);
                return -1;
            }
            int refPos1 = rec.getUnclippedStart();
            int readPos0 = 0;
            for (final CigarElement ce : cigar.getCigarElements()) {
                final CigarOperator op = ce.getOperator();
                final boolean consummeReadBaseOrSoftClip = op.consumesReadBases() || op.equals(CigarOperator.S);
                if (op.consumesReferenceBases() && consummeReadBaseOrSoftClip) {
                    for (int i = 0; i < ce.getLength(); ++i) {
                        final int nowRefPos1 = (refPos1 + i);
                        final int nowReadPos0 = (readPos0 + i);
                        final Position position = index2pos.get(nowRefPos1);
                        if (position == null)
                            continue;
                        if (nowReadPos0 >= bases.length)
                            continue;
                        final char base = (char) Character.toUpperCase(bases[nowReadPos0]);
                        final String sample = position.base2sample.get(base);
                        if (sample == null)
                            continue;
                        selectedSamples.add(sample);
                        index2pos.remove(nowRefPos1);
                        if (index2pos.isEmpty())
                            break;
                    }
                }
                if (op.consumesReferenceBases())
                    refPos1 += ce.getLength();
                if (consummeReadBaseOrSoftClip || op.equals(CigarOperator.H)) {
                    readPos0 += ce.getLength();
                }
            }
            if (selectedSamples.isEmpty()) {
                rec.setAttribute("RG", UNAFFECTED_SAMPLE);
            } else if (selectedSamples.size() == 1) {
                rec.setAttribute("RG", selectedSamples.iterator().next());
            } else {
                rec.setAttribute("RG", AMBIGOUS_SAMPLE);
            }
            sfw.addAlignment(rec);
        }
        progress.finish();
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : Cigar(htsjdk.samtools.Cigar) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) CigarElement(htsjdk.samtools.CigarElement) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) CigarOperator(htsjdk.samtools.CigarOperator) HashMap(java.util.HashMap) Term(com.github.lindenb.semontology.Term) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ParametersDelegate(com.beust.jcommander.ParametersDelegate) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) Map(java.util.Map) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) CloserUtil(htsjdk.samtools.util.CloserUtil) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Collection(java.util.Collection) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) IOException(java.io.IOException) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SamReader(htsjdk.samtools.SamReader) Collectors(java.util.stream.Collectors) File(java.io.File) SAMRecord(htsjdk.samtools.SAMRecord) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) BufferedReader(java.io.BufferedReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) IOException(java.io.IOException) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) BufferedReader(java.io.BufferedReader) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval)

Example 97 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar234081 method doWork.

@Override
public int doWork(java.util.List<String> args) {
    SamReader in = null;
    SAMFileWriter w = null;
    SAMRecordIterator iter = null;
    try {
        in = super.openSamReader(oneFileOrNull(args));
        w = this.writingBamArgs.openSAMFileWriter(this.outputFile, in.getFileHeader(), true);
        iter = in.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            if (!rec.getReadUnmappedFlag() && rec.getCigar() != null) {
                final Cigar cigar = rec.getCigar();
                final List<CigarElement> elements = new ArrayList<>(cigar.numCigarElements());
                for (int i = 0; i < cigar.numCigarElements(); ++i) {
                    CigarElement ce = cigar.getCigarElement(i);
                    switch(ce.getOperator()) {
                        case X:
                        case EQ:
                            ce = new CigarElement(ce.getLength(), CigarOperator.M);
                            break;
                        default:
                            break;
                    }
                    if (!elements.isEmpty() && elements.get(elements.size() - 1).getOperator() == ce.getOperator()) {
                        elements.set(elements.size() - 1, new CigarElement(ce.getLength() + elements.get(elements.size() - 1).getLength(), ce.getOperator()));
                    } else {
                        elements.add(ce);
                    }
                }
                rec.setCigar(new Cigar(elements));
            }
            w.addAlignment(rec);
        }
        LOG.debug("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(in);
        CloserUtil.close(w);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) Cigar(htsjdk.samtools.Cigar) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) CigarElement(htsjdk.samtools.CigarElement)

Example 98 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar234230 method doWork.

@Override
public int doWork(final List<String> args) {
    if (this.windowSize <= 0) {
        LOG.error("Bad window size.");
        return -1;
    }
    if (this.windowShift <= 0) {
        LOG.error("Bad window shift.");
        return -1;
    }
    SamReader in = null;
    PrintWriter out = null;
    SAMRecordIterator iter = null;
    final List<SlidingWindow> buffer = new ArrayList<>();
    try {
        in = super.openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = in.getFileHeader();
        if (header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
            LOG.error("Bam is not sorted on coordinate");
            return -1;
        }
        out = super.openFileOrStdoutAsPrintWriter(this.outputFile);
        out.print("#contig");
        out.print("\t");
        out.print("start");
        out.print("\t");
        out.print("end");
        out.print("\t");
        out.print("pairs_in_window");
        out.print("\t");
        out.print("pairs_over_window");
        out.print("\t");
        out.print("pairs_partial_overlap");
        out.println();
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        iter = in.iterator();
        String prev_contig = null;
        for (; ; ) {
            final SAMRecord rec = (iter.hasNext() ? progress.watch(iter.next()) : null);
            if (rec != null) {
                if (rec.getReadUnmappedFlag())
                    continue;
                if (this.filter.filterOut(rec))
                    continue;
                if (rec.getReadUnmappedFlag())
                    continue;
                if (!rec.getReadPairedFlag())
                    continue;
                if (rec.getMateUnmappedFlag())
                    continue;
                if (!rec.getProperPairFlag())
                    continue;
                if (!rec.getReferenceName().equals(rec.getMateReferenceName()))
                    continue;
                if (rec.getMateAlignmentStart() > rec.getAlignmentStart())
                    continue;
            }
            if (rec == null || !rec.getContig().equals(prev_contig)) {
                for (final SlidingWindow w : buffer) {
                    w.print(out, prev_contig);
                }
                buffer.clear();
                if (rec == null)
                    break;
                prev_contig = rec.getContig();
            }
            final int fragStart = rec.getMateAlignmentStart();
            final int fragEnd = rec.getAlignmentEnd();
            while (!buffer.isEmpty() && buffer.get(0).end < fragStart) {
                buffer.remove(0).print(out, rec.getContig());
                ;
            }
            final int winStart = fragStart - fragStart % this.windowSize;
            final int winEnd = fragEnd - fragEnd % this.windowSize;
            for (int winPos = winStart; winPos <= winEnd; winPos += this.windowShift) {
                if (buffer.isEmpty() || buffer.get(buffer.size() - 1).start < winPos) {
                    buffer.add(new SlidingWindow(winPos, winPos + this.windowSize));
                }
            }
            for (SlidingWindow w : buffer) {
                w.watch(fragStart, fragEnd);
            }
        }
        progress.finish();
        out.flush();
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(in);
        CloserUtil.close(out);
    }
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecord(htsjdk.samtools.SAMRecord) ArrayList(java.util.ArrayList) SAMFileHeader(htsjdk.samtools.SAMFileHeader) PrintWriter(java.io.PrintWriter)

Example 99 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Biostar84452 method doWork.

@Override
public int doWork(final List<String> args) {
    if (!StringUtil.isBlank(this.customTag)) {
        if (customTag.length() != 2 || !customTag.startsWith("X")) {
            LOG.error("Bad tag: expect length=2 && start with 'X'");
            return -1;
        }
    }
    SAMFileWriter sfw = null;
    SamReader sfr = null;
    try {
        sfr = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        sfw = this.WritingBamArgs.openSAMFileWriter(outputFile, header, true);
        long nChanged = 0L;
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            if (rec.getReadUnmappedFlag()) {
                sfw.addAlignment(rec);
                continue;
            }
            final Cigar cigar = rec.getCigar();
            if (cigar == null) {
                sfw.addAlignment(rec);
                continue;
            }
            final String originalCigarSting = rec.getCigarString();
            final byte[] bases = rec.getReadBases();
            if (bases == null) {
                sfw.addAlignment(rec);
                continue;
            }
            final ArrayList<CigarElement> L = new ArrayList<CigarElement>();
            final ByteArrayOutputStream nseq = new ByteArrayOutputStream();
            final ByteArrayOutputStream nqual = new ByteArrayOutputStream();
            final byte[] quals = rec.getBaseQualities();
            int indexBases = 0;
            for (final CigarElement ce : cigar.getCigarElements()) {
                switch(ce.getOperator()) {
                    case S:
                        indexBases += ce.getLength();
                        break;
                    // cont
                    case H:
                    // cont
                    case P:
                    // cont
                    case N:
                    case D:
                        {
                            L.add(ce);
                            break;
                        }
                    case I:
                    case EQ:
                    case X:
                    case M:
                        {
                            L.add(ce);
                            nseq.write(bases, indexBases, ce.getLength());
                            if (quals.length != 0)
                                nqual.write(quals, indexBases, ce.getLength());
                            indexBases += ce.getLength();
                            break;
                        }
                    default:
                        {
                            throw new SAMException("Unsupported Cigar opertator:" + ce.getOperator());
                        }
                }
            }
            if (indexBases != bases.length) {
                throw new SAMException("ERRROR " + rec.getCigarString());
            }
            if (L.size() == cigar.numCigarElements()) {
                sfw.addAlignment(rec);
                continue;
            }
            ++nChanged;
            if (!StringUtil.isBlank(this.customTag))
                rec.setAttribute(this.customTag, originalCigarSting);
            rec.setCigar(new Cigar(L));
            rec.setReadBases(nseq.toByteArray());
            if (quals.length != 0)
                rec.setBaseQualities(nqual.toByteArray());
            sfw.addAlignment(rec);
        }
        progress.finish();
        iter.close();
        LOG.info("Num records changed:" + nChanged);
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) CigarElement(htsjdk.samtools.CigarElement) SAMException(htsjdk.samtools.SAMException) SamReader(htsjdk.samtools.SamReader) SAMException(htsjdk.samtools.SAMException) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 100 with SAMRecordIterator

use of htsjdk.samtools.SAMRecordIterator in project jvarkit by lindenb.

the class Bam2Raster method scan.

private void scan(final SamReader r) {
    final SAMRecordIterator iter = r.query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
    while (iter.hasNext()) {
        final SAMRecord rec = iter.next();
        if (rec.getReadUnmappedFlag())
            continue;
        if (this.samRecordFilter.filterOut(rec))
            continue;
        if (!this.interval.getContig().equals(rec.getReferenceName()))
            continue;
        if (super.readRight.apply(rec) < this.interval.getStart()) {
            continue;
        }
        if (super.readLeft.apply(rec) > this.interval.getEnd()) {
            break;
        }
        final String group = this.groupBy.apply(rec.getReadGroup());
        if (group == null)
            continue;
        PartitionImage partition = this.key2partition.get(group);
        if (partition == null) {
            partition = new PartitionImage(group);
            this.key2partition.put(group, partition);
        }
        partition.add(rec);
    }
    iter.close();
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMRecord(htsjdk.samtools.SAMRecord)

Aggregations

SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)107 SAMRecord (htsjdk.samtools.SAMRecord)92 SamReader (htsjdk.samtools.SamReader)83 SAMFileHeader (htsjdk.samtools.SAMFileHeader)49 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)47 File (java.io.File)47 SAMFileWriter (htsjdk.samtools.SAMFileWriter)45 IOException (java.io.IOException)41 ArrayList (java.util.ArrayList)34 CigarElement (htsjdk.samtools.CigarElement)30 Cigar (htsjdk.samtools.Cigar)26 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)24 SamReaderFactory (htsjdk.samtools.SamReaderFactory)21 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)18 CigarOperator (htsjdk.samtools.CigarOperator)16 Interval (htsjdk.samtools.util.Interval)16 PrintWriter (java.io.PrintWriter)15 HashMap (java.util.HashMap)15 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)14 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)14