Search in sources :

Example 71 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar84452 method doWork.

@Override
public int doWork(final List<String> args) {
    if (!StringUtil.isBlank(this.customTag)) {
        if (customTag.length() != 2 || !customTag.startsWith("X")) {
            LOG.error("Bad tag: expect length=2 && start with 'X'");
            return -1;
        }
    }
    SAMFileWriter sfw = null;
    SamReader sfr = null;
    try {
        sfr = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        sfw = this.WritingBamArgs.openSAMFileWriter(outputFile, header, true);
        long nChanged = 0L;
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header).logger(LOG);
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            if (rec.getReadUnmappedFlag()) {
                sfw.addAlignment(rec);
                continue;
            }
            final Cigar cigar = rec.getCigar();
            if (cigar == null) {
                sfw.addAlignment(rec);
                continue;
            }
            final String originalCigarSting = rec.getCigarString();
            final byte[] bases = rec.getReadBases();
            if (bases == null) {
                sfw.addAlignment(rec);
                continue;
            }
            final ArrayList<CigarElement> L = new ArrayList<CigarElement>();
            final ByteArrayOutputStream nseq = new ByteArrayOutputStream();
            final ByteArrayOutputStream nqual = new ByteArrayOutputStream();
            final byte[] quals = rec.getBaseQualities();
            int indexBases = 0;
            for (final CigarElement ce : cigar.getCigarElements()) {
                switch(ce.getOperator()) {
                    case S:
                        indexBases += ce.getLength();
                        break;
                    // cont
                    case H:
                    // cont
                    case P:
                    // cont
                    case N:
                    case D:
                        {
                            L.add(ce);
                            break;
                        }
                    case I:
                    case EQ:
                    case X:
                    case M:
                        {
                            L.add(ce);
                            nseq.write(bases, indexBases, ce.getLength());
                            if (quals.length != 0)
                                nqual.write(quals, indexBases, ce.getLength());
                            indexBases += ce.getLength();
                            break;
                        }
                    default:
                        {
                            throw new SAMException("Unsupported Cigar opertator:" + ce.getOperator());
                        }
                }
            }
            if (indexBases != bases.length) {
                throw new SAMException("ERRROR " + rec.getCigarString());
            }
            if (L.size() == cigar.numCigarElements()) {
                sfw.addAlignment(rec);
                continue;
            }
            ++nChanged;
            if (!StringUtil.isBlank(this.customTag))
                rec.setAttribute(this.customTag, originalCigarSting);
            rec.setCigar(new Cigar(L));
            rec.setReadBases(nseq.toByteArray());
            if (quals.length != 0)
                rec.setBaseQualities(nqual.toByteArray());
            sfw.addAlignment(rec);
        }
        progress.finish();
        iter.close();
        LOG.info("Num records changed:" + nChanged);
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
    return 0;
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) ByteArrayOutputStream(java.io.ByteArrayOutputStream) CigarElement(htsjdk.samtools.CigarElement) SAMException(htsjdk.samtools.SAMException) SamReader(htsjdk.samtools.SamReader) SAMException(htsjdk.samtools.SAMException) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 72 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class SamJmx method doWork.

private int doWork(SamReader in) throws IOException {
    String name = this.projectName;
    if (name == null || name.trim().isEmpty()) {
        name = "undefined";
    }
    MBeanServer mbeanServer = ManagementFactory.getPlatformMBeanServer();
    LocatableStreamInfo dynamicMBean = new LocatableStreamInfo(name);
    LocatableStreamInfo.Status status = LocatableStreamInfo.Status.IDLE;
    ObjectName objectMBean = null;
    SAMFileWriter out = null;
    SAMRecordIterator iter = null;
    try {
        out = this.writingBamArgs.openSAMFileWriter(output, in.getFileHeader(), true);
        objectMBean = new ObjectName(dynamicMBean.getClass().getPackage().getName() + ":type=" + dynamicMBean.getClass().getSimpleName());
        mbeanServer.registerMBean(dynamicMBean, objectMBean);
        iter = in.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            out.addAlignment(rec);
            status = dynamicMBean.watch(rec);
            if (status == LocatableStreamInfo.Status.ABORT || status == LocatableStreamInfo.Status.BREAK) {
                LOG.error("#### Process \"" + name + "\" received message " + status.name());
                break;
            }
        }
        if (status == LocatableStreamInfo.Status.ABORT) {
            LOG.error("#### Process \"" + name + "\" : Exit failure");
            mbeanServer.unregisterMBean(objectMBean);
            if (this.output != null) {
                CloserUtil.close(out);
                out = null;
                this.output.delete();
            }
            System.exit(-1);
        }
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(out);
        if (objectMBean != null) {
            try {
                mbeanServer.unregisterMBean(objectMBean);
            } catch (Exception err2) {
            }
        }
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) IOException(java.io.IOException) MBeanServer(javax.management.MBeanServer) ObjectName(javax.management.ObjectName)

Example 73 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar78400Test method test01.

@Test
public void test01() throws IOException {
    final String flowcell = "HS20001259127";
    final String lane = "1";
    final File in = createTmpFile(".bam");
    SAMFileHeader header = new SAMFileHeader();
    header.setSortOrder(SortOrder.unsorted);
    SAMFileWriter sfw = new SAMFileWriterFactory().makeBAMWriter(header, true, in);
    DefaultSAMRecordFactory recfactory = new DefaultSAMRecordFactory();
    SAMRecord rec = recfactory.createSAMRecord(header);
    rec.setReadName(flowcell + ":" + lane + ":1210:15640:52255");
    rec.setReadString("GAATTC");
    rec.setBaseQualityString("222222");
    SAMUtils.makeReadUnmapped(rec);
    sfw.addAlignment(rec);
    sfw.close();
    assertIsValidBam(in);
    final File xml = createTmpFile(".xml");
    PrintWriter pw = new PrintWriter(xml);
    pw.println("<?xml version=\"1.0\"?><read-groups>" + "<flowcell name=\"" + flowcell + "\"><lane index=\"" + lane + "\">" + "<group ID=\"X1\"><library>L1</library><platform>P1</platform>" + "<sample>S1</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group>" + "</lane></flowcell><flowcell name=\"HS20001259128\">" + "<lane index=\"2\"><group ID=\"x2\"><library>L2</library>" + "<platform>P2</platform><sample>S2</sample><platformunit>PU1</platformunit>" + "<center>C1</center><description>blabla</description></group></lane>" + "</flowcell></read-groups>");
    pw.flush();
    pw.close();
    assertIsXml(xml);
    final File out = createTmpFile(".bam");
    Assert.assertEquals(new Biostar78400().instanceMain(newCmd().add("-o").add(out).add("-x").add(xml).add(in).make()), 0);
    SamReader r = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT).open(out);
    Assert.assertTrue(r.getFileHeader() != null);
    Assert.assertTrue(r.getFileHeader().getReadGroups() != null);
    Assert.assertFalse(r.getFileHeader().getReadGroups().isEmpty());
    SAMRecordIterator iter = r.iterator();
    Assert.assertTrue(iter.hasNext());
    rec = iter.next();
    SAMReadGroupRecord rg = rec.getReadGroup();
    Assert.assertNotNull(rg);
    Assert.assertEquals(rg.getId(), "X1");
    Assert.assertEquals(rg.getSample(), "S1");
    Assert.assertFalse(iter.hasNext());
    iter.close();
    r.close();
}
Also used : SamReader(htsjdk.samtools.SamReader) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) PrintWriter(java.io.PrintWriter) Test(org.testng.annotations.Test)

Example 74 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar139647 method doWork.

@Override
public int doWork(final List<String> args) {
    SAMFileWriter w = null;
    LineIterator r = null;
    try {
        final String filename = super.oneFileOrNull(args);
        if (filename == null) {
            LOG.info("Reading from stdin");
            r = IOUtils.openStreamForLineIterator(stdin());
        } else {
            LOG.info("Reading from " + filename);
            r = IOUtils.openURIForLineIterator(filename);
        }
        Format format = Format.None;
        while (r.hasNext() && format == Format.None) {
            String line = r.peek();
            if (line.trim().isEmpty()) {
                r.next();
                continue;
            }
            if (line.startsWith("CLUSTAL")) {
                format = Format.Clustal;
                // consume
                r.next();
                break;
            } else if (line.startsWith(">")) {
                format = Format.Fasta;
                break;
            } else {
                LOG.error("MSA format not recognized in " + line);
                return -1;
            }
        }
        LOG.info("format : " + format);
        if (Format.Fasta.equals(format)) {
            // this.consensus=new FastaConsensus();
            AlignSequence curr = null;
            while (r.hasNext()) {
                String line = r.next();
                if (line.startsWith(">")) {
                    curr = new AlignSequence();
                    curr.name = line.substring(1).trim();
                    if (sample2sequence.containsKey(curr.name)) {
                        LOG.error("Sequence ID " + curr.name + " defined twice");
                        return -1;
                    }
                    sample2sequence.put(curr.name, curr);
                } else if (curr != null) {
                    curr.seq.append(line.trim());
                    this.align_length = Math.max(this.align_length, curr.seq.length());
                }
            }
        } else if (Format.Clustal.equals(format)) {
            AlignSequence curr = null;
            int columnStart = -1;
            while (r.hasNext()) {
                String line = r.next();
                if (line.trim().isEmpty() || line.startsWith("CLUSTAL W")) {
                    columnStart = -1;
                    continue;
                }
                if (line.charAt(0) == ' ') {
                    if (columnStart == -1) {
                        LOG.error("illegal consensus line for " + line);
                        return -1;
                    }
                } else {
                    if (columnStart == -1) {
                        columnStart = line.indexOf(' ');
                        if (columnStart == -1) {
                            LOG.error("no whithespace in " + line);
                            return -1;
                        }
                        while (columnStart < line.length() && line.charAt(columnStart) == ' ') {
                            columnStart++;
                        }
                    }
                    String seqname = line.substring(0, columnStart).trim();
                    curr = this.sample2sequence.get(seqname);
                    if (curr == null) {
                        curr = new AlignSequence();
                        curr.name = seqname;
                        this.sample2sequence.put(curr.name, curr);
                    }
                    curr.seq.append(line.substring(columnStart));
                    this.align_length = Math.max(align_length, curr.seq.length());
                }
            }
        } else {
            LOG.error("Undefined input format");
            return -1;
        }
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary dict = new SAMSequenceDictionary();
        dict.addSequence(new SAMSequenceRecord(REF, this.align_length));
        header.setSortOrder(SortOrder.unsorted);
        header.setSequenceDictionary(dict);
        final SAMProgramRecord pgr = header.createProgramRecord();
        pgr.setProgramName(getProgramName());
        pgr.setProgramVersion(getVersion());
        if (getProgramCommandLine().trim().isEmpty()) {
            pgr.setCommandLine("(empty)");
        } else {
            pgr.setCommandLine(getProgramCommandLine());
        }
        w = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, false);
        final DefaultSAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
        for (final String seqName : this.sample2sequence.keySet()) {
            final AlignSequence shortRead = this.sample2sequence.get(seqName);
            final SAMRecord rec = samRecordFactory.createSAMRecord(header);
            rec.setReadName(seqName);
            rec.setReadString(shortRead.seq.toString().replaceAll("[\\*\\- ]+", ""));
            int start = 0;
            while (start < shortRead.seq.length() && !Character.isLetter(shortRead.at(start))) {
                start++;
            }
            rec.setAlignmentStart(start + 1);
            int end = shortRead.seq.length() - 1;
            while (end > 0 && !Character.isLetter(shortRead.at(end))) {
                --end;
            }
            // rec.setAlignmentEnd(end+1); not supported
            int n = start;
            StringBuilder dna = new StringBuilder();
            final List<CigarElement> cigars = new ArrayList<>();
            while (n <= end) {
                if (!Character.isLetter(shortRead.at(n))) {
                    cigars.add(new CigarElement(1, CigarOperator.D));
                } else {
                    cigars.add(new CigarElement(1, CigarOperator.M));
                    dna.append(shortRead.at(n));
                }
                n++;
            }
            // simplify cigar string
            n = 0;
            while (n + 1 < cigars.size()) {
                CigarElement c1 = cigars.get(n);
                CigarElement c2 = cigars.get(n + 1);
                if (c1.getOperator().equals(c2.getOperator())) {
                    cigars.set(n, new CigarElement(c1.getLength() + c2.getLength(), c1.getOperator()));
                    cigars.remove(n + 1);
                } else {
                    ++n;
                }
            }
            rec.setReadPairedFlag(false);
            rec.setMappingQuality(60);
            rec.setReferenceName(REF);
            rec.setReadString(dna.toString());
            rec.setCigar(new Cigar(cigars));
            rec.setAttribute("PG", pgr.getId());
            w.addAlignment(rec);
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(w);
    }
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) LineIterator(htsjdk.tribble.readers.LineIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Example 75 with SAMFileWriter

use of htsjdk.samtools.SAMFileWriter in project jvarkit by lindenb.

the class Biostar145820 method doWork.

@Override
public int doWork(final List<String> args) {
    SamReader samReader = null;
    SAMRecordIterator iter = null;
    SAMFileWriter samWriter = null;
    Random random = new Random();
    CloseableIterator<RandSamRecord> iter2 = null;
    try {
        final String input = oneFileOrNull(args);
        samReader = super.openSamReader(input);
        final SAMFileHeader header = samReader.getFileHeader().clone();
        header.setSortOrder(SortOrder.unsorted);
        header.addComment("Processed with " + getProgramName() + " : " + getProgramCommandLine());
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(samReader.getFileHeader()).logger(LOG);
        iter = samReader.iterator();
        final SortingCollection<RandSamRecord> sorter = SortingCollection.newInstance(RandSamRecord.class, new RandSamRecordCodec(header), new RandSamRecordComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
        sorter.setDestructiveIteration(true);
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            if (this.filter.filterOut(rec)) {
                continue;
            }
            final RandSamRecord r = new RandSamRecord();
            r.rand_index = random.nextInt();
            r.samRecord = progress.watch(rec);
            sorter.add(r);
        }
        iter.close();
        iter = null;
        sorter.doneAdding();
        iter2 = sorter.iterator();
        samWriter = writingBamArgs.openSAMFileWriter(outputFile, header, true);
        final SAMFileWriter finalw = samWriter;
        iter2.stream().limit(this.count < 0L ? Long.MAX_VALUE - 1 : this.count).map(R -> R.samRecord).forEach(R -> finalw.addAlignment(R));
        iter2.close();
        iter2 = null;
        sorter.cleanup();
        progress.finish();
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(iter2);
        CloserUtil.close(samReader);
        CloserUtil.close(samWriter);
    }
    return 0;
}
Also used : CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) Random(java.util.Random) SAMFileHeader(htsjdk.samtools.SAMFileHeader) SAMRecordQueryNameComparator(htsjdk.samtools.SAMRecordQueryNameComparator) ParametersDelegate(com.beust.jcommander.ParametersDelegate) SortOrder(htsjdk.samtools.SAMFileHeader.SortOrder) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) BAMRecordCodec(htsjdk.samtools.BAMRecordCodec) CloserUtil(htsjdk.samtools.util.CloserUtil) OutputStream(java.io.OutputStream) SortingCollection(htsjdk.samtools.util.SortingCollection) BinaryCodec(htsjdk.samtools.util.BinaryCodec) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) SAMFileWriter(htsjdk.samtools.SAMFileWriter) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) File(java.io.File) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) List(java.util.List) SamRecordJEXLFilter(com.github.lindenb.jvarkit.util.samtools.SamRecordJEXLFilter) Comparator(java.util.Comparator) InputStream(java.io.InputStream) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) Random(java.util.Random) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SAMFileWriter (htsjdk.samtools.SAMFileWriter)76 SAMRecord (htsjdk.samtools.SAMRecord)63 SAMFileHeader (htsjdk.samtools.SAMFileHeader)55 SamReader (htsjdk.samtools.SamReader)55 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)46 File (java.io.File)40 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)27 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)25 IOException (java.io.IOException)22 ArrayList (java.util.ArrayList)20 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)14 Cigar (htsjdk.samtools.Cigar)13 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)13 CigarElement (htsjdk.samtools.CigarElement)12 SamReaderFactory (htsjdk.samtools.SamReaderFactory)12 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)10 Interval (htsjdk.samtools.util.Interval)9 PrintWriter (java.io.PrintWriter)9 List (java.util.List)9 HashMap (java.util.HashMap)8