Search in sources :

Example 21 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project polyGembler by c-zhou.

the class SamToTaxa method distributeSortedBam.

private void distributeSortedBam() {
    // TODO Auto-generated method stub
    long start = System.currentTimeMillis();
    try {
        File indexFile = new File(myIndexFileName);
        myLogger.info("Using the following index file:");
        myLogger.info(indexFile.getAbsolutePath());
        File samFile = new File(mySamFileName);
        myLogger.info("Using the following BAM file:");
        myLogger.info(samFile.getAbsolutePath());
        final SamReaderFactory factory = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS).validationStringency(ValidationStringency.SILENT);
        final SamReader inputSam = factory.open(samFile);
        header_sam = inputSam.getFileHeader();
        header_sam.setSortOrder(SortOrder.unsorted);
        indexReader = Utils.getBufferedReader(indexFile, 65536);
        String line = indexReader.readLine();
        String[] samples = line.split("\\s+");
        for (int i = 1; i < samples.length; i++) {
            String taxa = samples[i];
            SAMFileHeader header = header_sam.clone();
            SAMReadGroupRecord rg = new SAMReadGroupRecord(taxa);
            rg.setSample(taxa);
            header.addReadGroup(rg);
            bam_writers.put(taxa, new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, new File(myOutputDir + System.getProperty("file.separator") + taxa + ".bam")));
            locks.put(taxa, new Object());
        }
        SAMRecordIterator iter = inputSam.iterator();
        cache();
        this.initial_thread_pool();
        final int block = 10000;
        int bS = 0;
        SAMRecord[] Qs = new SAMRecord[block];
        SAMRecord temp = iter.next();
        int k = 0;
        allReads = 0;
        long tag;
        while (temp != null) {
            allReads++;
            Qs[k] = temp;
            temp = iter.hasNext() ? iter.next() : null;
            k++;
            tag = temp == null ? 0 : Long.parseLong(temp.getReadName());
            if (k == block || temp == null || tag >= cursor) {
                executor.submit(new Runnable() {

                    private SAMRecord[] sam;

                    private int block_i;

                    @Override
                    public void run() {
                        // TODO Auto-generated method stub
                        long tag;
                        Tag tagObj;
                        String taxa;
                        for (int i = 0; i < sam.length; i++) {
                            try {
                                if (sam[i] == null)
                                    break;
                                tag = Long.parseLong(sam[i].getReadName());
                                tagObj = indexMap.get(tag);
                                int l = tagObj.taxa.length;
                                long cov = 0;
                                for (int t = 0; t < l; t++) cov += tagObj.count[t];
                                if (cov > maxCov) {
                                    myLogger.info("?tag id, " + tag + "::tag sequence, " + sam[i].getReadString() + "::aligned to " + sam[i].getReferenceName() + ":" + sam[i].getAlignmentStart() + "-" + sam[i].getAlignmentEnd() + "::omitted due to abnormal high coverage, " + cov);
                                    continue;
                                }
                                for (int t = 0; t < l; t++) {
                                    taxa = tagObj.taxa[t];
                                    sam[i].setAttribute("RG", taxa);
                                    int p = tagObj.count[t];
                                    synchronized (locks.get(taxa)) {
                                        for (int r = 0; r < p; r++) bam_writers.get(taxa).addAlignment(sam[i]);
                                    }
                                }
                            } catch (Exception e) {
                                Thread t = Thread.currentThread();
                                t.getUncaughtExceptionHandler().uncaughtException(t, e);
                                e.printStackTrace();
                                executor.shutdown();
                                System.exit(1);
                            }
                        }
                        if (block * (block_i + 1) % 1000000 == 0)
                            myLogger.info("Tag processed: " + block * (block_i + 1));
                    }

                    public Runnable init(SAMRecord[] sam, int bS) {
                        this.sam = sam;
                        this.block_i = bS;
                        return (this);
                    }
                }.init(Qs, bS));
                k = 0;
                Qs = new SAMRecord[block];
                bS++;
                if (temp == null || tag >= cursor) {
                    this.waitFor();
                    if (temp != null) {
                        cache();
                        this.initial_thread_pool();
                    }
                }
            }
        }
        iter.close();
        inputSam.close();
        indexReader.close();
        // executor.awaitTermination(365, TimeUnit.DAYS);
        for (String key : bam_writers.keySet()) bam_writers.get(key).close();
        myLogger.info("Total number of reads in lane=" + allReads);
        myLogger.info("Process took " + (System.currentTimeMillis() - start) / 1000 + " seconds.");
    } catch (Exception e) {
        e.printStackTrace();
    }
}
Also used : SamReaderFactory(htsjdk.samtools.SamReaderFactory) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File)

Example 22 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class BamStats02 method run.

private void run(String filename, SamReader r, PrintWriter out) {
    final Counter<Category> counter = new Counter<>();
    SAMRecordIterator iter = null;
    try {
        iter = r.iterator();
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(r.getFileHeader().getSequenceDictionary());
        while (iter.hasNext()) {
            final SAMRecord record = progress.watch(iter.next());
            final Category cat = new Category();
            cat.set(STRING_PROPS.filename, filename);
            cat.flag = record.getFlags();
            cat.set(INT_PROPS.inTarget, -1);
            final SAMReadGroupRecord g = record.getReadGroup();
            if (g != null) {
                cat.set(STRING_PROPS.samplename, g.getSample());
                cat.set(STRING_PROPS.platform, g.getPlatform());
                cat.set(STRING_PROPS.platformUnit, g.getPlatformUnit());
                cat.set(STRING_PROPS.library, g.getLibrary());
            }
            final ShortReadName readName = ShortReadName.parse(record);
            if (readName.isValid()) {
                cat.set(STRING_PROPS.instrument, readName.getInstrumentName());
                cat.set(STRING_PROPS.flowcell, readName.getFlowCellId());
                cat.set(INT_PROPS.lane, readName.getFlowCellLane());
                cat.set(INT_PROPS.run, readName.getRunId());
            }
            if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
                cat.set(STRING_PROPS.mate_chromosome, record.getMateReferenceName());
            }
            if (!record.getReadUnmappedFlag()) {
                cat.set(INT_PROPS.mapq, (int) (Math.ceil(record.getMappingQuality() / 10.0) * 10));
                cat.set(STRING_PROPS.chromosome, record.getReferenceName());
                if (this.intervals != null) {
                    if (this.intervals.containsOverlapping(new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd()))) {
                        cat.set(INT_PROPS.inTarget, 1);
                    } else {
                        cat.set(INT_PROPS.inTarget, 0);
                    }
                }
            }
            counter.incr(cat);
        }
        progress.finish();
        for (final Category cat : counter.keySetDecreasing()) {
            cat.print(out);
            out.print("\t");
            out.print(counter.count(cat));
            out.println();
        }
        out.flush();
    } finally {
        CloserUtil.close(iter);
    }
}
Also used : Counter(com.github.lindenb.jvarkit.util.Counter) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ShortReadName(com.github.lindenb.jvarkit.util.illumina.ShortReadName) SAMRecord(htsjdk.samtools.SAMRecord) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) Interval(htsjdk.samtools.util.Interval)

Example 23 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class BamStats05 method doWork.

protected int doWork(final PrintWriter pw, final Map<String, List<Interval>> gene2interval, final String filename, final SamReader IN) throws Exception {
    try {
        LOG.info("Scanning " + filename);
        final SAMFileHeader header = IN.getFileHeader();
        final List<SAMReadGroupRecord> rgs = header.getReadGroups();
        if (rgs == null || rgs.isEmpty())
            throw new IOException("No read groups in " + filename);
        final Set<String> groupNames = this.groupBy.getPartitions(rgs);
        for (final String partition : groupNames) {
            if (partition.isEmpty())
                throw new IOException("Empty " + groupBy.name());
            for (final String gene : gene2interval.keySet()) {
                int geneStart = Integer.MAX_VALUE;
                int geneEnd = 0;
                final List<Integer> counts = new ArrayList<>();
                for (final Interval interval : gene2interval.get(gene)) {
                    geneStart = Math.min(geneStart, interval.getStart() - 1);
                    geneEnd = Math.max(geneEnd, interval.getEnd());
                    /* picard javadoc:  - Sequence name - Start position (1-based) - End position (1-based, end inclusive)  */
                    int[] interval_counts = new int[interval.getEnd() - interval.getStart() + 1];
                    if (interval_counts.length == 0)
                        continue;
                    Arrays.fill(interval_counts, 0);
                    if (IN.getFileHeader().getSequenceIndex(interval.getContig()) == -1) {
                        throw new IllegalArgumentException("NO DICT FOR \"" + interval.getContig() + "\"");
                    }
                    /**
                     *     start - 1-based, inclusive start of interval of interest. Zero implies start of the reference sequence.
                     *	   end - 1-based, inclusive end of interval of interest. Zero implies end of the reference sequence.
                     */
                    SAMRecordIterator r = IN.query(new QueryInterval[] { new QueryInterval(header.getSequenceIndex(interval.getContig()), interval.getStart(), interval.getEnd()) }, false);
                    while (r.hasNext()) {
                        final SAMRecord rec = r.next();
                        if (rec.getReadUnmappedFlag())
                            continue;
                        if (filter.filterOut(rec))
                            continue;
                        if (!rec.getReferenceName().equals(interval.getContig()))
                            continue;
                        final SAMReadGroupRecord rg = rec.getReadGroup();
                        if (rg == null || !partition.equals(this.groupBy.apply(rg)))
                            continue;
                        final Cigar cigar = rec.getCigar();
                        if (cigar == null)
                            continue;
                        int refpos1 = rec.getAlignmentStart();
                        for (final CigarElement ce : cigar.getCigarElements()) {
                            final CigarOperator op = ce.getOperator();
                            if (!op.consumesReferenceBases())
                                continue;
                            if (op.consumesReadBases()) {
                                for (int i = 0; i < ce.getLength(); ++i) {
                                    if (refpos1 + i >= interval.getStart() && refpos1 + i <= interval.getEnd()) {
                                        interval_counts[refpos1 + i - interval.getStart()]++;
                                    }
                                }
                            }
                            refpos1 += ce.getLength();
                        }
                    }
                    /* end while r */
                    r.close();
                    for (int d : interval_counts) {
                        counts.add(d);
                    }
                }
                /* end interval */
                Collections.sort(counts);
                int count_no_coverage = 0;
                double mean = 0;
                for (int cov : counts) {
                    if (cov <= MIN_COVERAGE)
                        ++count_no_coverage;
                    mean += cov;
                }
                mean /= counts.size();
                pw.println(gene2interval.get(gene).get(0).getContig() + "\t" + geneStart + "\t" + geneEnd + "\t" + gene + "\t" + partition + "\t" + counts.size() + "\t" + counts.get(0) + "\t" + counts.get(counts.size() - 1) + "\t" + mean + "\t" + count_no_coverage + "\t" + (int) (((counts.size() - count_no_coverage) / (double) counts.size()) * 100.0));
            }
        // end gene
        }
        // end sample
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(IN);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) QueryInterval(htsjdk.samtools.QueryInterval) IOException(java.io.IOException) CigarOperator(htsjdk.samtools.CigarOperator) CigarElement(htsjdk.samtools.CigarElement) IOException(java.io.IOException) Cigar(htsjdk.samtools.Cigar) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Interval(htsjdk.samtools.util.Interval) QueryInterval(htsjdk.samtools.QueryInterval)

Example 24 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class IgvReview method start.

@Override
public void start(final Stage stage) throws Exception {
    stage.setTitle(getClass().getSimpleName());
    Predicate<VariantContext> ctxFilter;
    Map<String, String> params = super.getParameters().getNamed();
    if (params.containsKey("--filter")) {
        ctxFilter = JexlVariantPredicate.create(params.get("--filter"));
    } else {
        ctxFilter = V -> true;
    }
    final List<String> args = super.getParameters().getUnnamed();
    final File configFile;
    if (args.isEmpty()) {
        final FileChooser fc = new FileChooser();
        final String lastDirStr = preferences.get(LAST_USED_DIR_KEY, null);
        if (lastDirStr != null && !lastDirStr.isEmpty()) {
            fc.setInitialDirectory(new File(lastDirStr));
        }
        fc.getExtensionFilters().addAll(Collections.singletonList(new FileChooser.ExtensionFilter("Config file", "*.config", "*.cfg", "*.list")));
        configFile = fc.showOpenDialog(stage);
    } else if (args.size() == 1) {
        configFile = new File(args.get(0));
    } else {
        configFile = null;
    }
    if (configFile == null || !configFile.exists()) {
        JfxUtils.dialog().cause("Illegal number of arguments or file doesn't exists.").show(stage);
        Platform.exit();
        return;
    }
    if (configFile.isFile() && configFile.getParentFile() != null) {
        this.preferences.put(LAST_USED_DIR_KEY, configFile.getParentFile().getPath());
    }
    final List<String> configLines = Files.readAllLines(configFile.toPath());
    final Predicate<String> ignoreComment = (S) -> !S.startsWith("#");
    final Predicate<String> predVcf = S -> S.endsWith(".vcf") || S.endsWith(".vcf.gz");
    if (configLines.stream().filter(ignoreComment).filter(predVcf).count() != 1) {
        JfxUtils.dialog().cause("Found more than one vcf file in " + configFile).show(stage);
        Platform.exit();
        return;
    }
    final File vcfFile = configLines.stream().filter(ignoreComment).filter(predVcf).map(S -> new File(S)).findFirst().get();
    LOG.info("Opening vcf file and loading in memory");
    VCFFileReader vfr = null;
    CloseableIterator<VariantContext> iter = null;
    final Set<String> sampleNames;
    try {
        this.variants.clear();
        vfr = new VCFFileReader(vcfFile, false);
        this.vcfHeader = vfr.getFileHeader();
        sampleNames = new HashSet<>(this.vcfHeader.getSampleNamesInOrder());
        if (sampleNames.isEmpty()) {
            JfxUtils.dialog().cause("No Genotypes in " + vcfFile).show(stage);
            Platform.exit();
            return;
        }
        iter = vfr.iterator();
        this.variants.addAll(iter.stream().filter(ctxFilter).filter(CTX -> CTX.isVariant()).collect(Collectors.toList()));
    } catch (final Exception err) {
        JfxUtils.dialog().cause(err).show(stage);
        Platform.exit();
        return;
    } finally {
        CloserUtil.close(iter);
        CloserUtil.close(vfr);
    }
    if (this.variants.isEmpty()) {
        JfxUtils.dialog().cause("No Variants").show(stage);
        Platform.exit();
        return;
    }
    final SAMSequenceDictionary dict = this.vcfHeader.getSequenceDictionary();
    if (dict == null || dict.isEmpty()) {
        JfxUtils.dialog().cause(JvarkitException.VcfDictionaryMissing.getMessage(vcfFile.getPath())).show(stage);
        Platform.exit();
        return;
    }
    final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
    configLines.stream().filter(ignoreComment).filter(S -> S.endsWith(".bam")).map(S -> new File(S)).forEach(F -> {
        final SamReader samIn = srf.open(F);
        final SAMFileHeader header = samIn.getFileHeader();
        CloserUtil.close(samIn);
        String sample = null;
        for (final SAMReadGroupRecord rg : header.getReadGroups()) {
            String s = rg.getSample();
            if (s == null)
                continue;
            if (sample == null) {
                sample = s;
            } else if (!sample.equals(s)) {
                JfxUtils.dialog().cause("Two samples in " + F).show(stage);
                Platform.exit();
                return;
            }
        }
        if (sample == null) {
            JfxUtils.dialog().cause("No sample in " + F + ". Ignoring").show(stage);
            return;
        }
        if (!sampleNames.contains(sample)) {
            JfxUtils.dialog().cause("Not in VCF header " + sample + " / " + F + ". Ignoring").show(stage);
            return;
        }
        this.sample2bamFile.put(sample, F);
    });
    if (this.sample2bamFile.isEmpty()) {
        JfxUtils.dialog().cause("No valid bam file in " + configFile).show(stage);
        return;
    }
    sampleNames.retainAll(this.sample2bamFile.keySet());
    if (sampleNames.isEmpty()) {
        JfxUtils.dialog().cause("No Sample associated to bam").show(stage);
        return;
    }
    ObservableList<VariantAndGenotype> genotypes = FXCollections.observableArrayList(this.variants.stream().flatMap(CTX -> CTX.getGenotypes().stream().filter(G -> sampleNames.contains(G.getSampleName())).map(G -> new VariantAndGenotype(CTX, G))).collect(Collectors.toList()));
    if (genotypes.isEmpty()) {
        JfxUtils.dialog().cause("No Genotype to show").show(stage);
        return;
    }
    Menu menu = new Menu("File");
    MenuItem menuItem = new MenuItem("Save as...");
    menuItem.setOnAction(AE -> {
        saveVariantsAs(stage);
    });
    menu.getItems().add(menuItem);
    menuItem = new MenuItem("Save");
    menuItem.setOnAction(AE -> {
        if (this.saveAsFile != null) {
            saveVariants(stage, this.saveAsFile);
        } else {
            saveVariantsAs(stage);
        }
    });
    menu.getItems().add(menuItem);
    menu.getItems().add(new SeparatorMenuItem());
    menuItem = new MenuItem("Quit");
    menuItem.setOnAction(AE -> {
        Platform.exit();
    });
    menu.getItems().add(menuItem);
    MenuBar bar = new MenuBar(menu);
    this.genotypeTable = new TableView<>(genotypes);
    this.genotypeTable.getColumns().add(makeColumn("CHROM", G -> G.ctx.getContig()));
    this.genotypeTable.getColumns().add(makeColumn("POS", G -> G.ctx.getStart()));
    this.genotypeTable.getColumns().add(makeColumn("ID", G -> G.ctx.getID()));
    this.genotypeTable.getColumns().add(makeColumn("REF", G -> G.ctx.getReference().getDisplayString()));
    this.genotypeTable.getColumns().add(makeColumn("ALT", G -> G.ctx.getAlternateAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
    this.genotypeTable.getColumns().add(makeColumn("Sample", G -> G.g.getSampleName()));
    this.genotypeTable.getColumns().add(makeColumn("Type", G -> G.g.getType().name()));
    this.genotypeTable.getColumns().add(makeColumn("Alleles", G -> G.g.getAlleles().stream().map(A -> A.getDisplayString()).collect(Collectors.joining(","))));
    TableColumn<VariantAndGenotype, String> reviewCol = new TableColumn<>("Review");
    reviewCol.setCellValueFactory(C -> C.getValue().getReviewProperty());
    reviewCol.setCellFactory(TextFieldTableCell.forTableColumn());
    reviewCol.setOnEditCommit((E) -> {
        int y = E.getTablePosition().getRow();
        this.genotypeTable.getItems().get(y).setReview(E.getNewValue());
    });
    reviewCol.setEditable(true);
    this.genotypeTable.getColumns().add(reviewCol);
    this.genotypeTable.getSelectionModel().cellSelectionEnabledProperty().set(true);
    this.genotypeTable.setEditable(true);
    final ContextMenu cm = new ContextMenu();
    MenuItem mi1 = new MenuItem("Menu 1");
    cm.getItems().add(mi1);
    MenuItem mi2 = new MenuItem("Menu 2");
    cm.getItems().add(mi2);
    this.genotypeTable.setOnMousePressed(event -> {
        if (event.isPrimaryButtonDown() && (event.getClickCount() == 3 || event.isShiftDown())) {
            moveIgvTo(stage, genotypeTable.getSelectionModel().getSelectedItem());
        } else if (event.isSecondaryButtonDown()) {
            cm.show(genotypeTable, event.getScreenX(), event.getScreenY());
        }
    });
    final BorderPane pane2 = new BorderPane(this.genotypeTable);
    pane2.setPadding(new Insets(10, 10, 10, 10));
    VBox vbox1 = new VBox(bar, pane2);
    final Scene scene = new Scene(vbox1, 500, 300);
    stage.setScene(scene);
    stage.show();
}
Also used : JexlVariantPredicate(com.github.lindenb.jvarkit.util.vcf.JexlVariantPredicate) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) VBox(javafx.scene.layout.VBox) SAMFileHeader(htsjdk.samtools.SAMFileHeader) Application(javafx.application.Application) Vector(java.util.Vector) ContextMenu(javafx.scene.control.ContextMenu) IgvConstants(com.github.lindenb.jvarkit.util.igv.IgvConstants) Map(java.util.Map) TableView(javafx.scene.control.TableView) CloserUtil(htsjdk.samtools.util.CloserUtil) PrintWriter(java.io.PrintWriter) MenuItem(javafx.scene.control.MenuItem) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Predicate(java.util.function.Predicate) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Platform(javafx.application.Platform) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) List(java.util.List) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) ObservableList(javafx.collections.ObservableList) BorderPane(javafx.scene.layout.BorderPane) SamReaderFactory(htsjdk.samtools.SamReaderFactory) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) Genotype(htsjdk.variant.variantcontext.Genotype) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Scene(javafx.scene.Scene) Socket(java.net.Socket) SimpleStringProperty(javafx.beans.property.SimpleStringProperty) FXCollections(javafx.collections.FXCollections) HashMap(java.util.HashMap) BackingStoreException(java.util.prefs.BackingStoreException) TextFieldTableCell(javafx.scene.control.cell.TextFieldTableCell) Function(java.util.function.Function) ValidationStringency(htsjdk.samtools.ValidationStringency) ArrayList(java.util.ArrayList) TableColumn(javafx.scene.control.TableColumn) HashSet(java.util.HashSet) Insets(javafx.geometry.Insets) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) MenuBar(javafx.scene.control.MenuBar) Files(java.nio.file.Files) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) SamReader(htsjdk.samtools.SamReader) InputStreamReader(java.io.InputStreamReader) File(java.io.File) Preferences(java.util.prefs.Preferences) Menu(javafx.scene.control.Menu) FileChooser(javafx.stage.FileChooser) Stage(javafx.stage.Stage) Closeable(java.io.Closeable) JfxUtils(com.github.lindenb.jvarkit.jfx.components.JfxUtils) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Window(javafx.stage.Window) BufferedReader(java.io.BufferedReader) Collections(java.util.Collections) BorderPane(javafx.scene.layout.BorderPane) Insets(javafx.geometry.Insets) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) MenuBar(javafx.scene.control.MenuBar) ContextMenu(javafx.scene.control.ContextMenu) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) FileChooser(javafx.stage.FileChooser) ContextMenu(javafx.scene.control.ContextMenu) Menu(javafx.scene.control.Menu) SamReaderFactory(htsjdk.samtools.SamReaderFactory) MenuItem(javafx.scene.control.MenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) SeparatorMenuItem(javafx.scene.control.SeparatorMenuItem) Scene(javafx.scene.Scene) TableColumn(javafx.scene.control.TableColumn) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) BackingStoreException(java.util.prefs.BackingStoreException) IOException(java.io.IOException) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) VBox(javafx.scene.layout.VBox)

Example 25 with SAMReadGroupRecord

use of htsjdk.samtools.SAMReadGroupRecord in project jvarkit by lindenb.

the class Biostar78400 method doWork.

@Override
public int doWork(List<String> args) {
    if (this.XML == null) {
        LOG.error("XML file missing");
        return -1;
    }
    final Map<String, Map<Integer, String>> flowcell2lane2id = new HashMap<String, Map<Integer, String>>();
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    try {
        final Pattern readNameSignature = Pattern.compile(this.readNameSignatureStr);
        final JAXBContext context = JAXBContext.newInstance(ReadGroup.class, ReadGroupList.class);
        final Unmarshaller unmarshaller = context.createUnmarshaller();
        final ReadGroupList rgl = unmarshaller.unmarshal(new StreamSource(XML), ReadGroupList.class).getValue();
        if (rgl.flowcells.isEmpty()) {
            LOG.error("empty XML " + XML);
            return -1;
        }
        sfr = openSamReader(oneFileOrNull(args));
        final SAMFileHeader header = sfr.getFileHeader().clone();
        header.addComment("Processed with " + getProgramName());
        final Set<String> seenids = new HashSet<String>();
        final List<SAMReadGroupRecord> samReadGroupRecords = new ArrayList<SAMReadGroupRecord>();
        for (final FlowCell fc : rgl.flowcells) {
            final Map<Integer, String> lane2id = new HashMap<Integer, String>();
            for (final Lane lane : fc.lanes) {
                for (final ReadGroup rg : lane.readGroups) {
                    if (seenids.contains(rg.id)) {
                        LOG.error("Group id " + rg.id + " defined twice");
                        return -1;
                    }
                    seenids.add(rg.id);
                    // create the read group we'll be using
                    final SAMReadGroupRecord rgrec = new SAMReadGroupRecord(rg.id);
                    rgrec.setLibrary(rg.library);
                    rgrec.setPlatform(rg.platform);
                    rgrec.setSample(rg.sample);
                    rgrec.setPlatformUnit(rg.platformunit);
                    if (rg.center != null)
                        rgrec.setSequencingCenter(rg.center);
                    if (rg.description != null)
                        rgrec.setDescription(rg.description);
                    lane2id.put(lane.id, rg.id);
                    samReadGroupRecords.add(rgrec);
                }
            }
            if (flowcell2lane2id.containsKey(fc.name)) {
                LOG.error("FlowCell id " + fc.name + " defined twice in XML");
                return -1;
            }
            flowcell2lane2id.put(fc.name, lane2id);
        }
        header.setReadGroups(samReadGroupRecords);
        sfw = this.writingBamArgs.openSAMFileWriter(this.outputFile, header, true);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        final SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            final SAMRecord rec = progress.watch(iter.next());
            final Matcher matcher = readNameSignature.matcher(rec.getReadName());
            final String flowcellStr;
            final String laneStr;
            if (matcher.matches()) {
                flowcellStr = matcher.group(1);
                laneStr = matcher.group(2);
            } else {
                LOG.error("Read name " + rec.getReadName() + " doesn't match regular expression " + readNameSignature.pattern() + ". please check options");
                return -1;
            }
            String RGID = null;
            final Map<Integer, String> lane2id = flowcell2lane2id.get(flowcellStr);
            if (lane2id == null)
                throw new RuntimeException("Cannot get flowcell/readgroup for " + rec.getReadName());
            try {
                RGID = lane2id.get(Integer.parseInt(laneStr));
            } catch (final Exception e) {
                LOG.error("bad lane-Id in " + rec.getReadName());
                return -1;
            }
            if (RGID == null) {
                throw new RuntimeException("Cannot get RGID for " + rec.getReadName() + " flowcell:" + flowcellStr + " lane2id:" + laneStr + " dict:" + lane2id);
            }
            rec.setAttribute(SAMTag.RG.name(), RGID);
            sfw.addAlignment(rec);
        }
        progress.finish();
        iter.close();
        LOG.info("done");
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfw);
        CloserUtil.close(sfr);
    }
}
Also used : SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) HashMap(java.util.HashMap) Matcher(java.util.regex.Matcher) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) ArrayList(java.util.ArrayList) JAXBContext(javax.xml.bind.JAXBContext) SamReader(htsjdk.samtools.SamReader) Unmarshaller(javax.xml.bind.Unmarshaller) HashSet(java.util.HashSet) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMFileWriter(htsjdk.samtools.SAMFileWriter) StreamSource(javax.xml.transform.stream.StreamSource) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) HashMap(java.util.HashMap) Map(java.util.Map)

Aggregations

SAMReadGroupRecord (htsjdk.samtools.SAMReadGroupRecord)81 SAMFileHeader (htsjdk.samtools.SAMFileHeader)48 SAMRecord (htsjdk.samtools.SAMRecord)33 Test (org.testng.annotations.Test)31 SamReader (htsjdk.samtools.SamReader)29 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)26 File (java.io.File)23 ArrayList (java.util.ArrayList)22 SAMRecordIterator (htsjdk.samtools.SAMRecordIterator)20 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)20 HashMap (java.util.HashMap)18 CigarElement (htsjdk.samtools.CigarElement)17 Cigar (htsjdk.samtools.Cigar)16 HashSet (java.util.HashSet)16 SAMFileWriter (htsjdk.samtools.SAMFileWriter)15 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)15 CigarOperator (htsjdk.samtools.CigarOperator)14 IOException (java.io.IOException)14 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)13 List (java.util.List)12