Search in sources :

Example 11 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.

the class FindMyVirus method doWork.

@Override
public int doWork(List<String> args) {
    if (virusNames.isEmpty()) {
        LOG.error("no virus name");
        return -1;
    }
    SamReader sfr = null;
    SAMFileWriter[] sfwArray = new SAMFileWriter[CAT.values().length];
    try {
        sfr = openSamReader(oneFileOrNull(args));
        SAMFileHeader header = sfr.getFileHeader();
        for (CAT category : CAT.values()) {
            LOG.info("Opening " + category);
            SAMFileHeader header2 = header.clone();
            header2.addComment("Category:" + category.name());
            header2.addComment("Description:" + category.getDescription());
            SAMProgramRecord rec = header2.createProgramRecord();
            rec.setCommandLine(this.getProgramCommandLine());
            rec.setProgramName(getProgramName());
            rec.setProgramVersion(getVersion());
            rec.setAttribute("CAT", category.name());
            File outputFile = new File(this.outputFile.getParentFile(), this.outputFile.getName() + "." + category.name() + ".bam");
            LOG.info("Opening " + outputFile);
            File countFile = new File(outputFile.getParentFile(), outputFile.getName() + "." + category.name() + ".count.txt");
            SAMFileWriter sfw = writingBamArgs.openSAMFileWriter(outputFile, header2, true);
            sfw = new SAMFileWriterCount(sfw, countFile, category);
            sfwArray[category.ordinal()] = sfw;
        }
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header.getSequenceDictionary());
        OtherCanonicalAlignFactory xpAlignFactory = new OtherCanonicalAlignFactory(header);
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            SAMRecord rec = iter.next();
            progress.watch(rec);
            CAT category = null;
            List<OtherCanonicalAlign> xpList = Collections.emptyList();
            if (category == null && !rec.getReadPairedFlag()) {
                category = CAT.unpaired;
            }
            if (category == null && rec.isSecondaryOrSupplementary()) {
                category = CAT.secondary;
            }
            if (category == null && rec.getReadFailsVendorQualityCheckFlag()) {
                category = CAT.failsqual;
            }
            if (category == null && rec.getDuplicateReadFlag()) {
                category = CAT.duplicate;
            }
            if (category == null && rec.getReadUnmappedFlag()) {
                category = CAT.unmapped;
            }
            if (category == null) {
                xpList = xpAlignFactory.getXPAligns(rec);
            }
            boolean xp_containsVirus = false;
            boolean xp_containsChrom = false;
            for (OtherCanonicalAlign xpa : xpList) {
                if (virusNames.contains(xpa.getReferenceName())) {
                    xp_containsVirus = true;
                } else {
                    xp_containsChrom = true;
                }
            }
            /* both reads mapped on ref */
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) {
                if (!xp_containsVirus) {
                    category = CAT.both_ref;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            /*  pair(unmapped,mapped on reference) */
            if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && !virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && !virusNames.contains(rec.getMateReferenceName())))) {
                if (!xp_containsVirus) {
                    category = CAT.ref_orphan;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            /* both reads mapped on virus */
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())) {
                if (!xp_containsChrom) {
                    category = CAT.both_virus;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            if (category == null && ((!rec.getReadUnmappedFlag() && rec.getMateUnmappedFlag() && virusNames.contains(rec.getReferenceName())) || (rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && virusNames.contains(rec.getMateReferenceName())))) {
                if (!xp_containsChrom) {
                    category = CAT.virus_orphan;
                } else {
                    category = CAT.ref_and_virus_spliced;
                }
            }
            if (category == null && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag() && ((virusNames.contains(rec.getReferenceName()) && !virusNames.contains(rec.getMateReferenceName())) || (!virusNames.contains(rec.getReferenceName()) && virusNames.contains(rec.getMateReferenceName())))) {
                category = CAT.ref_and_virus;
            }
            /*dispatch */
            if (category == null) {
                LOG.warning("Not handled: " + rec);
                category = CAT.undetermined;
            }
            sfwArray[category.ordinal()].addAlignment(rec);
        }
        for (SAMFileWriter sfw : sfwArray) {
            LOG.info("Closing " + sfw);
            sfw.close();
        }
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        LOG.info("Closing");
        CloserUtil.close(sfr);
        CloserUtil.close(sfwArray);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) OtherCanonicalAlign(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlign) SAMFileWriter(htsjdk.samtools.SAMFileWriter) OtherCanonicalAlignFactory(com.github.lindenb.jvarkit.util.picard.OtherCanonicalAlignFactory) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 12 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.

the class BamStage method createProgramRecordPane.

private Tab createProgramRecordPane(final SAMFileHeader header) {
    final TableView<SAMProgramRecord> table = new TableView<>(header == null ? FXCollections.observableArrayList() : FXCollections.observableArrayList(header.getProgramRecords()));
    table.getColumns().add(makeColumn("ID", G -> G.getId()));
    table.getColumns().add(makeColumn("PG-ID", G -> G.getProgramGroupId()));
    table.getColumns().add(makeColumn("Prev-PG-ID", G -> G.getPreviousProgramGroupId()));
    table.getColumns().add(makeColumn("Version", G -> G.getProgramVersion()));
    table.getColumns().add(makeColumn("Command", G -> G.getCommandLine()));
    final Tab tab = new Tab("PG", table);
    tab.setClosable(false);
    table.setPlaceholder(new Label("No Program-Group."));
    return tab;
}
Also used : Arrays(java.util.Arrays) VCFHeader(htsjdk.variant.vcf.VCFHeader) CigarOperator(htsjdk.samtools.CigarOperator) ChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ChartFactory) VariantContextChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.VariantContextChartFactory) ScrollPane(javafx.scene.control.ScrollPane) TabPane(javafx.scene.control.TabPane) Map(java.util.Map) Hershey(com.github.lindenb.jvarkit.util.Hershey) CigarOpPerPositionChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.CigarOpPerPositionChartFactory) ScriptException(javax.script.ScriptException) CloserUtil(htsjdk.samtools.util.CloserUtil) Rectangle2D(javafx.geometry.Rectangle2D) SplitPane(javafx.scene.control.SplitPane) SAMTagUtil(htsjdk.samtools.SAMTagUtil) GraphicsContext(javafx.scene.canvas.GraphicsContext) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) Event(javafx.event.Event) Set(java.util.Set) SAMFileWriter(htsjdk.samtools.SAMFileWriter) Screen(javafx.stage.Screen) SAMTagAndValue(htsjdk.samtools.SAMRecord.SAMTagAndValue) FastqRecord(htsjdk.samtools.fastq.FastqRecord) Platform(javafx.application.Platform) Separator(javafx.scene.control.Separator) Stream(java.util.stream.Stream) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) FlowPane(javafx.scene.layout.FlowPane) SAMFlag(htsjdk.samtools.SAMFlag) BorderPane(javafx.scene.layout.BorderPane) SamFlagsChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.SamFlagsChartFactory) CloseableIterator(htsjdk.samtools.util.CloseableIterator) FXCollections(javafx.collections.FXCollections) LogCloseableIterator(com.github.lindenb.jvarkit.tools.vcfviewgui.NgsStage.LogCloseableIterator) TextFlow(javafx.scene.text.TextFlow) Supplier(java.util.function.Supplier) ArrayList(java.util.ArrayList) Color(javafx.scene.paint.Color) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) StringWriter(java.io.StringWriter) CheckBox(javafx.scene.control.CheckBox) IOException(java.io.IOException) File(java.io.File) Menu(javafx.scene.control.Menu) FileChooser(javafx.stage.FileChooser) SamInputResource(htsjdk.samtools.SamInputResource) TreeMap(java.util.TreeMap) Tab(javafx.scene.control.Tab) CompiledScript(javax.script.CompiledScript) ObservableValue(javafx.beans.value.ObservableValue) EventHandler(javafx.event.EventHandler) Button(javafx.scene.control.Button) CigarElement(htsjdk.samtools.CigarElement) CheckMenuItem(javafx.scene.control.CheckMenuItem) OverrunStyle(javafx.scene.control.OverrunStyle) VBox(javafx.scene.layout.VBox) SAMFileHeader(htsjdk.samtools.SAMFileHeader) BasesPerPositionChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.BasesPerPositionChartFactory) ComboBox(javafx.scene.control.ComboBox) AlertType(javafx.scene.control.Alert.AlertType) ContextMenu(javafx.scene.control.ContextMenu) WindowEvent(javafx.stage.WindowEvent) TableView(javafx.scene.control.TableView) ReadLengthChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ReadLengthChartFactory) Orientation(javafx.geometry.Orientation) Alert(javafx.scene.control.Alert) MenuItem(javafx.scene.control.MenuItem) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Font(javafx.scene.text.Font) Chart(javafx.scene.chart.Chart) Collectors(java.util.stream.Collectors) SAMRecord(htsjdk.samtools.SAMRecord) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Text(javafx.scene.text.Text) SimpleBindings(javax.script.SimpleBindings) QualityPerPositionChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.QualityPerPositionChartFactory) List(java.util.List) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Optional(java.util.Optional) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) SamReaderFactory(htsjdk.samtools.SamReaderFactory) FastqReader(htsjdk.samtools.fastq.FastqReader) ReadQualityChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ReadQualityChartFactory) Cigar(htsjdk.samtools.Cigar) Scene(javafx.scene.Scene) SequenceUtil(htsjdk.samtools.util.SequenceUtil) SAMUtils(htsjdk.samtools.SAMUtils) TextArea(javafx.scene.control.TextArea) ButtonType(javafx.scene.control.ButtonType) HashMap(java.util.HashMap) GCPercentChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.GCPercentChartFactory) MapqChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.MapqChartFactory) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) SAMTextHeaderCodec(htsjdk.samtools.SAMTextHeaderCodec) TableColumn(javafx.scene.control.TableColumn) HashSet(java.util.HashSet) Interval(htsjdk.samtools.util.Interval) TableCell(javafx.scene.control.TableCell) Insets(javafx.geometry.Insets) Tooltip(javafx.scene.control.Tooltip) Locatable(htsjdk.samtools.util.Locatable) Label(javafx.scene.control.Label) MenuBar(javafx.scene.control.MenuBar) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ReadChartFactory(com.github.lindenb.jvarkit.tools.vcfviewgui.chart.ReadChartFactory) SamReader(htsjdk.samtools.SamReader) ScrollEvent(javafx.scene.input.ScrollEvent) ActionEvent(javafx.event.ActionEvent) Stage(javafx.stage.Stage) SpinnerValueFactory(javafx.scene.control.SpinnerValueFactory) ChangeListener(javafx.beans.value.ChangeListener) ExtensionFilter(javafx.stage.FileChooser.ExtensionFilter) Collections(java.util.Collections) ScrollBar(javafx.scene.control.ScrollBar) Tab(javafx.scene.control.Tab) Label(javafx.scene.control.Label) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) TableView(javafx.scene.control.TableView)

Example 13 with SAMProgramRecord

use of htsjdk.samtools.SAMProgramRecord in project jvarkit by lindenb.

the class Biostar139647 method doWork.

@Override
public int doWork(final List<String> args) {
    SAMFileWriter w = null;
    LineIterator r = null;
    try {
        final String filename = super.oneFileOrNull(args);
        if (filename == null) {
            LOG.info("Reading from stdin");
            r = IOUtils.openStreamForLineIterator(stdin());
        } else {
            LOG.info("Reading from " + filename);
            r = IOUtils.openURIForLineIterator(filename);
        }
        Format format = Format.None;
        while (r.hasNext() && format == Format.None) {
            String line = r.peek();
            if (line.trim().isEmpty()) {
                r.next();
                continue;
            }
            if (line.startsWith("CLUSTAL")) {
                format = Format.Clustal;
                // consume
                r.next();
                break;
            } else if (line.startsWith(">")) {
                format = Format.Fasta;
                break;
            } else {
                LOG.error("MSA format not recognized in " + line);
                return -1;
            }
        }
        LOG.info("format : " + format);
        if (Format.Fasta.equals(format)) {
            // this.consensus=new FastaConsensus();
            AlignSequence curr = null;
            while (r.hasNext()) {
                String line = r.next();
                if (line.startsWith(">")) {
                    curr = new AlignSequence();
                    curr.name = line.substring(1).trim();
                    if (sample2sequence.containsKey(curr.name)) {
                        LOG.error("Sequence ID " + curr.name + " defined twice");
                        return -1;
                    }
                    sample2sequence.put(curr.name, curr);
                } else if (curr != null) {
                    curr.seq.append(line.trim());
                    this.align_length = Math.max(this.align_length, curr.seq.length());
                }
            }
        } else if (Format.Clustal.equals(format)) {
            AlignSequence curr = null;
            int columnStart = -1;
            while (r.hasNext()) {
                String line = r.next();
                if (line.trim().isEmpty() || line.startsWith("CLUSTAL W")) {
                    columnStart = -1;
                    continue;
                }
                if (line.charAt(0) == ' ') {
                    if (columnStart == -1) {
                        LOG.error("illegal consensus line for " + line);
                        return -1;
                    }
                } else {
                    if (columnStart == -1) {
                        columnStart = line.indexOf(' ');
                        if (columnStart == -1) {
                            LOG.error("no whithespace in " + line);
                            return -1;
                        }
                        while (columnStart < line.length() && line.charAt(columnStart) == ' ') {
                            columnStart++;
                        }
                    }
                    String seqname = line.substring(0, columnStart).trim();
                    curr = this.sample2sequence.get(seqname);
                    if (curr == null) {
                        curr = new AlignSequence();
                        curr.name = seqname;
                        this.sample2sequence.put(curr.name, curr);
                    }
                    curr.seq.append(line.substring(columnStart));
                    this.align_length = Math.max(align_length, curr.seq.length());
                }
            }
        } else {
            LOG.error("Undefined input format");
            return -1;
        }
        final SAMFileHeader header = new SAMFileHeader();
        final SAMSequenceDictionary dict = new SAMSequenceDictionary();
        dict.addSequence(new SAMSequenceRecord(REF, this.align_length));
        header.setSortOrder(SortOrder.unsorted);
        header.setSequenceDictionary(dict);
        final SAMProgramRecord pgr = header.createProgramRecord();
        pgr.setProgramName(getProgramName());
        pgr.setProgramVersion(getVersion());
        if (getProgramCommandLine().trim().isEmpty()) {
            pgr.setCommandLine("(empty)");
        } else {
            pgr.setCommandLine(getProgramCommandLine());
        }
        w = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, false);
        final DefaultSAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
        for (final String seqName : this.sample2sequence.keySet()) {
            final AlignSequence shortRead = this.sample2sequence.get(seqName);
            final SAMRecord rec = samRecordFactory.createSAMRecord(header);
            rec.setReadName(seqName);
            rec.setReadString(shortRead.seq.toString().replaceAll("[\\*\\- ]+", ""));
            int start = 0;
            while (start < shortRead.seq.length() && !Character.isLetter(shortRead.at(start))) {
                start++;
            }
            rec.setAlignmentStart(start + 1);
            int end = shortRead.seq.length() - 1;
            while (end > 0 && !Character.isLetter(shortRead.at(end))) {
                --end;
            }
            // rec.setAlignmentEnd(end+1); not supported
            int n = start;
            StringBuilder dna = new StringBuilder();
            final List<CigarElement> cigars = new ArrayList<>();
            while (n <= end) {
                if (!Character.isLetter(shortRead.at(n))) {
                    cigars.add(new CigarElement(1, CigarOperator.D));
                } else {
                    cigars.add(new CigarElement(1, CigarOperator.M));
                    dna.append(shortRead.at(n));
                }
                n++;
            }
            // simplify cigar string
            n = 0;
            while (n + 1 < cigars.size()) {
                CigarElement c1 = cigars.get(n);
                CigarElement c2 = cigars.get(n + 1);
                if (c1.getOperator().equals(c2.getOperator())) {
                    cigars.set(n, new CigarElement(c1.getLength() + c2.getLength(), c1.getOperator()));
                    cigars.remove(n + 1);
                } else {
                    ++n;
                }
            }
            rec.setReadPairedFlag(false);
            rec.setMappingQuality(60);
            rec.setReferenceName(REF);
            rec.setReadString(dna.toString());
            rec.setCigar(new Cigar(cigars));
            rec.setAttribute("PG", pgr.getId());
            w.addAlignment(rec);
        }
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(r);
        CloserUtil.close(w);
    }
}
Also used : SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) LineIterator(htsjdk.tribble.readers.LineIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DefaultSAMRecordFactory(htsjdk.samtools.DefaultSAMRecordFactory) CigarElement(htsjdk.samtools.CigarElement) SAMProgramRecord(htsjdk.samtools.SAMProgramRecord) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader)

Aggregations

SAMProgramRecord (htsjdk.samtools.SAMProgramRecord)13 SAMFileHeader (htsjdk.samtools.SAMFileHeader)11 SAMFileWriter (htsjdk.samtools.SAMFileWriter)8 SAMRecord (htsjdk.samtools.SAMRecord)7 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)6 SamReader (htsjdk.samtools.SamReader)6 File (java.io.File)5 SAMFileWriterFactory (htsjdk.samtools.SAMFileWriterFactory)4 SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)4 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 Interval (htsjdk.samtools.util.Interval)4 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 Cigar (htsjdk.samtools.Cigar)3 CigarElement (htsjdk.samtools.CigarElement)3 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)3 SamReaderFactory (htsjdk.samtools.SamReaderFactory)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 HashSet (java.util.HashSet)3 FastqReader (htsjdk.samtools.fastq.FastqReader)2