Search in sources :

Example 1 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class VCFPredictions method doVcfToVcf.

@Override
protected int doVcfToVcf(final String inputName, final VcfIterator r, VariantContextWriter w) {
    ReferenceContig genomicSequence = null;
    try {
        LOG.info("opening REF:" + this.referenceGenomeSource);
        this.referenceGenome = new ReferenceGenomeFactory().open(this.referenceGenomeSource);
        loadKnownGenesFromUri();
        final VCFHeader header = (VCFHeader) r.getHeader();
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(this.referenceGenome.getDictionary());
        contigNameConverter.setOnNotFound(OnNotFound.SKIP);
        final VCFHeader h2 = new VCFHeader(header);
        addMetaData(h2);
        switch(this.outputSyntax) {
            case Vep:
                {
                    h2.addMetaDataLine(new VCFInfoHeaderLine("CSQ", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Consequence type as predicted by VEP" + ". Format: Allele|Feature|Feature_type|Consequence|CDS_position|Protein_position|Amino_acids|Codons"));
                    break;
                }
            case SnpEff:
                {
                    h2.addMetaDataLine(new VCFInfoHeaderLine("ANN", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Functional annotations: 'Allele | Annotation | Annotation_Impact | Gene_Name | Gene_ID | Feature_Type | Feature_ID | Transcript_BioType | Rank | HGVS.c | HGVS.p | cDNA.pos / cDNA.length | CDS.pos / CDS.length | AA.pos / AA.length | Distance | ERRORS / WARNINGS / INFO'"));
                    break;
                }
            default:
                {
                    final StringBuilder format = new StringBuilder();
                    for (FORMAT1 f : FORMAT1.values()) {
                        if (format.length() > 0)
                            format.append("|");
                        format.append(f.name());
                    }
                    h2.addMetaDataLine(new VCFInfoHeaderLine(TAG, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Prediction from " + getClass().getSimpleName() + ". Format: " + format));
                    break;
                }
        }
        w.writeHeader(h2);
        final SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
        final SequenceOntologyTree.Term so_intron = soTree.getTermByAcn("SO:0001627");
        final SequenceOntologyTree.Term so_exon = soTree.getTermByAcn("SO:0001791");
        final SequenceOntologyTree.Term so_splice_donor = soTree.getTermByAcn("SO:0001575");
        final SequenceOntologyTree.Term so_splice_acceptor = soTree.getTermByAcn("SO:0001574");
        final SequenceOntologyTree.Term so_5_prime_UTR_variant = soTree.getTermByAcn("SO:0001623");
        final SequenceOntologyTree.Term so_3_prime_UTR_variant = soTree.getTermByAcn("SO:0001624");
        final SequenceOntologyTree.Term so_splicing_variant = soTree.getTermByAcn("SO:0001568");
        final SequenceOntologyTree.Term so_stop_lost = soTree.getTermByAcn("SO:0001578");
        final SequenceOntologyTree.Term so_stop_gained = soTree.getTermByAcn("SO:0001587");
        final SequenceOntologyTree.Term so_coding_synonymous = soTree.getTermByAcn("SO:0001819");
        final SequenceOntologyTree.Term so_coding_non_synonymous = soTree.getTermByAcn("SO:0001583");
        final SequenceOntologyTree.Term so_intergenic = soTree.getTermByAcn("SO:0001628");
        final SequenceOntologyTree.Term so_nc_transcript_variant = soTree.getTermByAcn("SO:0001619");
        final SequenceOntologyTree.Term so_non_coding_exon_variant = soTree.getTermByAcn("SO:0001792");
        final SequenceOntologyTree.Term _2KB_upstream_variant = soTree.getTermByAcn("SO:0001636");
        final SequenceOntologyTree.Term _5KB_upstream_variant = soTree.getTermByAcn("SO:0001635");
        final SequenceOntologyTree.Term _5KB_downstream_variant = soTree.getTermByAcn("SO:0001633");
        final SequenceOntologyTree.Term _500bp_downstream_variant = soTree.getTermByAcn("SO:0001634");
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
        while (r.hasNext()) {
            final VariantContext ctx = progress.watch(r.next());
            final String normalizedContig = contigNameConverter.apply(ctx.getContig());
            final List<KnownGene> genes = new ArrayList<>();
            if (!StringUtil.isBlank(normalizedContig)) {
                for (final List<KnownGene> l2 : this.knownGenes.getOverlapping(new Interval(normalizedContig, ctx.getStart(), // 1-based
                ctx.getEnd()))) {
                    genes.addAll(l2);
                }
            }
            final List<Annotation> ctx_annotations = new ArrayList<Annotation>();
            if (genes == null || genes.isEmpty()) {
                // intergenic
                Annotation a = new Annotation();
                a.seqont.add(so_intergenic);
                ctx_annotations.add(a);
            } else {
                if (genomicSequence == null || !genomicSequence.hasName(normalizedContig)) {
                    LOG.info("getting genomic Sequence for " + normalizedContig);
                    genomicSequence = this.referenceGenome.getContig(normalizedContig);
                    if (genomicSequence == null)
                        throw new JvarkitException.ContigNotFoundInDictionary(normalizedContig, this.referenceGenome.getDictionary());
                }
                for (final KnownGene gene : genes) {
                    final GeneticCode geneticCode = GeneticCode.getStandard();
                    for (final Allele alt2 : ctx.getAlternateAlleles()) {
                        if (alt2.isNoCall())
                            continue;
                        if (alt2.isSymbolic()) {
                            LOG.warn("symbolic allele are not handled... " + alt2.getDisplayString());
                            continue;
                        }
                        if (alt2.isReference())
                            continue;
                        final Annotation annotations = new Annotation();
                        annotations.kg = gene;
                        annotations.alt2 = alt2;
                        if (gene.isNonCoding()) {
                            annotations.seqont.add(so_nc_transcript_variant);
                            continue;
                        }
                        ctx_annotations.add(annotations);
                        StringBuilder wildRNA = null;
                        ProteinCharSequence wildProt = null;
                        ProteinCharSequence mutProt = null;
                        MutedSequence mutRNA = null;
                        int position_in_cds = -1;
                        final int position = ctx.getStart() - 1;
                        if (!String.valueOf(genomicSequence.charAt(position)).equalsIgnoreCase(ctx.getReference().getBaseString())) {
                            if (isSimpleBase(ctx.getReference())) {
                                LOG.warn("Warning REF!=GENOMIC SEQ!!! at " + position + "/" + ctx.getReference());
                            }
                            continue;
                        }
                        if (gene.isPositiveStrand()) {
                            if (position < gene.getTxStart() - 2000) {
                                annotations.seqont.add(_5KB_upstream_variant);
                            } else if (position < gene.getTxStart()) {
                                annotations.seqont.add(_2KB_upstream_variant);
                            } else if (position >= gene.getTxEnd() + 500) {
                                annotations.seqont.add(_5KB_downstream_variant);
                            } else if (position >= gene.getTxEnd()) {
                                annotations.seqont.add(_500bp_downstream_variant);
                            } else if (position < gene.getCdsStart()) {
                                // UTR5
                                annotations.seqont.add(so_5_prime_UTR_variant);
                            } else if (gene.getCdsEnd() <= position) {
                                annotations.seqont.add(so_3_prime_UTR_variant);
                            } else {
                                int exon_index = 0;
                                while (exon_index < gene.getExonCount()) {
                                    final KnownGene.Exon exon = gene.getExon(exon_index);
                                    for (int i = exon.getStart(); i < exon.getEnd(); ++i) {
                                        if (i == position) {
                                            annotations.exon_name = exon.getName();
                                            if (exon.isNonCoding()) {
                                                annotations.seqont.add(so_non_coding_exon_variant);
                                            }
                                        }
                                        if (i < gene.getTxStart())
                                            continue;
                                        if (i < gene.getCdsStart())
                                            continue;
                                        if (i >= gene.getCdsEnd())
                                            break;
                                        if (wildRNA == null) {
                                            wildRNA = new StringBuilder();
                                            mutRNA = new MutedSequence(wildRNA);
                                        }
                                        if (i == position) {
                                            annotations.seqont.add(so_exon);
                                            annotations.exon_name = exon.getName();
                                            position_in_cds = wildRNA.length();
                                            annotations.position_cds = position_in_cds;
                                            // in splicing ?
                                            if (exon.isSplicing(position)) {
                                                if (exon.isSplicingAcceptor(position)) {
                                                    // SPLICING_ACCEPTOR
                                                    annotations.seqont.add(so_splice_acceptor);
                                                } else if (exon.isSplicingDonor(position)) {
                                                    // SPLICING_DONOR
                                                    annotations.seqont.add(so_splice_donor);
                                                } else // ??
                                                {
                                                    annotations.seqont.add(so_splicing_variant);
                                                }
                                            }
                                        }
                                        wildRNA.append(genomicSequence.charAt(i));
                                        if (i == position && isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
                                            mutRNA.put(position_in_cds, alt2.getBaseString().charAt(0));
                                        }
                                        if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
                                            wildProt = new ProteinCharSequence(geneticCode, wildRNA);
                                            mutProt = new ProteinCharSequence(geneticCode, mutRNA);
                                        }
                                    }
                                    final KnownGene.Intron intron = exon.getNextIntron();
                                    if (intron != null && intron.contains(position)) {
                                        annotations.intron_name = intron.getName();
                                        annotations.seqont.add(so_intron);
                                        if (intron.isSplicing(position)) {
                                            if (intron.isSplicingAcceptor(position)) {
                                                annotations.seqont.add(so_splice_acceptor);
                                            } else if (intron.isSplicingDonor(position)) {
                                                annotations.seqont.add(so_splice_donor);
                                            } else // ???
                                            {
                                                annotations.seqont.add(so_splicing_variant);
                                            }
                                        }
                                    }
                                    ++exon_index;
                                }
                            }
                        } else // reverse orientation
                        {
                            if (position >= gene.getTxEnd() + 2000) {
                                annotations.seqont.add(_5KB_upstream_variant);
                            } else if (position >= gene.getTxEnd()) {
                                annotations.seqont.add(_2KB_upstream_variant);
                            } else if (position < gene.getTxStart() - 500) {
                                annotations.seqont.add(_5KB_downstream_variant);
                            } else if (position < gene.getTxStart()) {
                                annotations.seqont.add(_500bp_downstream_variant);
                            } else if (position < gene.getCdsStart()) {
                                annotations.seqont.add(so_3_prime_UTR_variant);
                            } else if (gene.getCdsEnd() <= position) {
                                annotations.seqont.add(so_5_prime_UTR_variant);
                            } else {
                                int exon_index = gene.getExonCount() - 1;
                                while (exon_index >= 0) {
                                    final KnownGene.Exon exon = gene.getExon(exon_index);
                                    for (int i = exon.getEnd() - 1; i >= exon.getStart(); --i) {
                                        if (i == position) {
                                            annotations.exon_name = exon.getName();
                                            if (exon.isNonCoding()) {
                                                annotations.seqont.add(so_non_coding_exon_variant);
                                            }
                                        }
                                        if (i >= gene.getCdsEnd())
                                            continue;
                                        if (i < gene.getCdsStart())
                                            break;
                                        if (wildRNA == null) {
                                            wildRNA = new StringBuilder();
                                            mutRNA = new MutedSequence(wildRNA);
                                        }
                                        if (i == position) {
                                            annotations.seqont.add(so_exon);
                                            position_in_cds = wildRNA.length();
                                            annotations.position_cds = position_in_cds;
                                            // in splicing ?
                                            if (exon.isSplicing(position)) {
                                                if (exon.isSplicingAcceptor(position)) {
                                                    annotations.seqont.add(so_splice_acceptor);
                                                } else if (exon.isSplicingDonor(position)) {
                                                    annotations.seqont.add(so_splice_donor);
                                                } else // ?
                                                {
                                                    annotations.seqont.add(so_splicing_variant);
                                                }
                                            }
                                            if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference())) {
                                                mutRNA.put(position_in_cds, AcidNucleics.complement(alt2.getBaseString().charAt(0)));
                                            }
                                        }
                                        wildRNA.append(AcidNucleics.complement(genomicSequence.charAt(i)));
                                        if (wildRNA.length() % 3 == 0 && wildRNA.length() > 0 && wildProt == null) {
                                            wildProt = new ProteinCharSequence(geneticCode, wildRNA);
                                            mutProt = new ProteinCharSequence(geneticCode, mutRNA);
                                        }
                                    }
                                    final KnownGene.Intron intron = exon.getPrevIntron();
                                    if (intron != null && intron.contains(position)) {
                                        annotations.intron_name = intron.getName();
                                        annotations.seqont.add(so_intron);
                                        if (intron.isSplicing(position)) {
                                            if (intron.isSplicingAcceptor(position)) {
                                                annotations.seqont.add(so_splice_acceptor);
                                            } else if (intron.isSplicingDonor(position)) {
                                                annotations.seqont.add(so_splice_donor);
                                            } else // ?
                                            {
                                                annotations.seqont.add(so_splicing_variant);
                                            }
                                        }
                                    }
                                    --exon_index;
                                }
                            }
                        }
                        if (isSimpleBase(alt2) && isSimpleBase(ctx.getReference()) && wildProt != null && mutProt != null && position_in_cds >= 0) {
                            final int pos_aa = position_in_cds / 3;
                            final int mod = position_in_cds % 3;
                            annotations.wildCodon = ("" + wildRNA.charAt(position_in_cds - mod + 0) + wildRNA.charAt(position_in_cds - mod + 1) + wildRNA.charAt(position_in_cds - mod + 2));
                            annotations.mutCodon = ("" + mutRNA.charAt(position_in_cds - mod + 0) + mutRNA.charAt(position_in_cds - mod + 1) + mutRNA.charAt(position_in_cds - mod + 2));
                            annotations.position_protein = (pos_aa + 1);
                            annotations.wildAA = String.valueOf(wildProt.charAt(pos_aa));
                            annotations.mutAA = (String.valueOf(mutProt.charAt(pos_aa)));
                            annotations.seqont.remove(so_exon);
                            if (isStop(wildProt.charAt(pos_aa)) && !isStop(mutProt.charAt(pos_aa))) {
                                annotations.seqont.add(so_stop_lost);
                            } else if (!isStop(wildProt.charAt(pos_aa)) && isStop(mutProt.charAt(pos_aa))) {
                                annotations.seqont.add(so_stop_gained);
                            } else if (wildProt.charAt(pos_aa) == mutProt.charAt(pos_aa)) {
                                annotations.seqont.add(so_coding_synonymous);
                            } else {
                                annotations.seqont.add(so_coding_non_synonymous);
                            }
                        }
                    }
                }
            }
            final Set<String> info = new HashSet<String>(ctx_annotations.size());
            for (final Annotation a : ctx_annotations) {
                info.add(a.toString());
            }
            final VariantContextBuilder vb = new VariantContextBuilder(ctx);
            final String thetag;
            switch(this.outputSyntax) {
                case Vep:
                    thetag = "CSQ";
                    break;
                case SnpEff:
                    thetag = "ANN";
                    break;
                default:
                    thetag = TAG;
                    break;
            }
            vb.attribute(thetag, info.toArray());
            w.add(vb.make());
        }
        return RETURN_OK;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(this.referenceGenome);
    }
}
Also used : ReferenceContig(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceContig) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ReferenceGenomeFactory(com.github.lindenb.jvarkit.util.bio.fasta.ReferenceGenomeFactory) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SequenceOntologyTree(com.github.lindenb.jvarkit.util.so.SequenceOntologyTree) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) GeneticCode(com.github.lindenb.jvarkit.util.bio.GeneticCode) Interval(htsjdk.samtools.util.Interval)

Example 2 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class VCFPredictions method loadKnownGenesFromUri.

private void loadKnownGenesFromUri() throws IOException {
    BufferedReader in = null;
    try {
        if (this.referenceGenome.getDictionary() == null) {
            throw new JvarkitException.FastaDictionaryMissing(this.referenceGenomeSource);
        }
        int n_ignored = 0;
        int n_genes = 0;
        this.knownGenes = new IntervalTreeMap<>();
        LOG.info("loading genes");
        final ContigNameConverter contigNameConverter = ContigNameConverter.fromOneDictionary(this.referenceGenome.getDictionary());
        contigNameConverter.setOnNotFound(OnNotFound.SKIP);
        in = IOUtils.openURIForBufferedReading(this.kgURI);
        String line;
        final Pattern tab = Pattern.compile("[\t]");
        while ((line = in.readLine()) != null) {
            if (StringUtil.isBlank(line))
                continue;
            final String[] tokens = tab.split(line);
            final KnownGene g = new KnownGene(tokens);
            final String normalizedContig = contigNameConverter.apply(g.getContig());
            if (StringUtil.isBlank(normalizedContig)) {
                ++n_ignored;
                continue;
            }
            if (this.referenceGenome.getDictionary().getSequence(normalizedContig) == null) {
                ++n_ignored;
                continue;
            }
            // because we want to set
            final int extend_gene_search = 5000;
            // SO:5KB_upstream_variant
            final Interval interval = new Interval(normalizedContig, Math.max(1, g.getTxStart() + 1 - extend_gene_search), g.getTxEnd() + extend_gene_search);
            List<KnownGene> L = this.knownGenes.get(interval);
            if (L == null) {
                L = new ArrayList<>(2);
                this.knownGenes.put(interval, L);
            }
            ++n_genes;
            L.add(g);
        }
        in.close();
        in = null;
        LOG.info("genes:" + n_genes + " (ignored: " + n_ignored + ")");
    } finally {
        CloserUtil.close(in);
    }
}
Also used : Pattern(java.util.regex.Pattern) BufferedReader(java.io.BufferedReader) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) Interval(htsjdk.samtools.util.Interval)

Example 3 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class FindAllCoverageAtPosition method getReferenceAt.

private char getReferenceAt(final String contig, int pos1) {
    if (this.indexedFastaSequenceFile == null)
        return '.';
    if (this.genomicSequence == null || !this.genomicSequence.getChrom().equals(contig)) {
        final SAMSequenceDictionary dict = this.indexedFastaSequenceFile.getSequenceDictionary();
        if (dict == null)
            return '.';
        final ContigNameConverter converter = ContigNameConverter.fromOneDictionary(dict);
        converter.setOnNotFound(ContigNameConverter.OnNotFound.SKIP);
        final String newctg = converter.apply(contig);
        final SAMSequenceRecord rec = (newctg == null ? null : dict.getSequence(newctg));
        if (rec != null)
            genomicSequence = new GenomicSequence(this.indexedFastaSequenceFile, newctg);
    }
    if (genomicSequence == null || pos1 < 0 || pos1 > genomicSequence.length())
        return '.';
    return genomicSequence.charAt(pos1 - 1);
}
Also used : GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)

Example 4 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class ConvertVcfChromosomes method doVcfToVcf.

@Override
protected int doVcfToVcf(String inputName, VcfIterator iterin, VariantContextWriter out) {
    final ContigNameConverter customMapping = ContigNameConverter.fromFile(mappingFile);
    customMapping.setOnNotFound(this.onNotFound);
    final Set<String> unseen = new HashSet<>();
    final VCFHeader header1 = iterin.getHeader();
    final VCFHeader header2 = new VCFHeader(header1.getMetaDataInInputOrder().stream().filter(L -> !L.getKey().equals(VCFHeader.CONTIG_KEY)).collect(Collectors.toSet()), header1.getSampleNamesInOrder());
    if (header1.getSequenceDictionary() != null) {
        header2.setSequenceDictionary(customMapping.convertDictionary(header1.getSequenceDictionary()));
    }
    out.writeHeader(header2);
    while (iterin.hasNext()) {
        final VariantContext ctx = iterin.next();
        final String newName = customMapping.apply(ctx.getContig());
        if (newName == null) {
            if (unseen.size() < 1000 && !unseen.contains(ctx.getContig())) {
                LOG.warn("Cannot find contig for " + ctx.getContig());
                unseen.add(ctx.getContig());
            }
            // skip unknown chromosomes
            continue;
        }
        final VariantContextBuilder vcb = new VariantContextBuilder(ctx);
        vcb.chr(newName);
        out.add(vcb.make());
    }
    return 0;
}
Also used : VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFHeader(htsjdk.variant.vcf.VCFHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Example 5 with ContigNameConverter

use of com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter in project jvarkit by lindenb.

the class ConvertBamChromosomes method doWork.

@Override
public int doWork(List<String> args) {
    SamReader sfr = null;
    SAMFileWriter sfw = null;
    final Set<String> unmappedChromosomes = new HashSet<>();
    try {
        final ContigNameConverter customMapping = ContigNameConverter.fromFile(mappingFile);
        customMapping.setOnNotFound(this.onNotFound);
        sfr = super.openSamReader(oneFileOrNull(args));
        SAMFileHeader header1 = sfr.getFileHeader();
        if (header1 == null) {
            LOG.error("File header missing");
            return -1;
        }
        SAMFileHeader header2 = header1.clone();
        // create new sequence dict
        final SAMSequenceDictionary dict1 = header1.getSequenceDictionary();
        if (dict1 == null) {
            LOG.error("Sequence dict missing");
            return -1;
        }
        final List<SAMSequenceRecord> ssrs = new ArrayList<SAMSequenceRecord>(dict1.size());
        for (int i = 0; i < dict1.size(); ++i) {
            SAMSequenceRecord ssr = dict1.getSequence(i);
            String newName = customMapping.apply(ssr.getSequenceName());
            if (newName == null) {
                // skip unknown chromosomes
                continue;
            }
            ssr = new SAMSequenceRecord(newName, ssr.getSequenceLength());
            ssrs.add(ssr);
        }
        header2.setSequenceDictionary(new SAMSequenceDictionary(ssrs));
        SAMSequenceDictionary dict2 = new SAMSequenceDictionary(ssrs);
        header2.setSequenceDictionary(dict2);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict1);
        sfw = this.writingBamArgs.openSAMFileWriter(output, header2, true);
        long num_ignored = 0L;
        SAMRecordIterator iter = sfr.iterator();
        while (iter.hasNext()) {
            SAMRecord rec1 = iter.next();
            progress.watch(rec1);
            String newName1 = null;
            String newName2 = null;
            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getReferenceName())) {
                newName1 = customMapping.apply(rec1.getReferenceName());
            }
            if (rec1.getReadPairedFlag() && !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getMateReferenceName())) {
                newName2 = customMapping.apply(rec1.getMateReferenceName());
            }
            rec1.setHeader(header2);
            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getReferenceName())) {
                if (newName1 == null) {
                    ++num_ignored;
                    continue;
                }
                rec1.setReferenceName(newName1);
            }
            if (rec1.getReadPairedFlag() && !SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec1.getMateReferenceName())) {
                if (newName2 == null) {
                    ++num_ignored;
                    continue;
                }
                rec1.setMateReferenceName(newName2);
            }
            sfw.addAlignment(rec1);
        }
        if (!unmappedChromosomes.isEmpty()) {
            LOG.warning("Unmapped chromosomes: " + unmappedChromosomes);
        }
        LOG.warning("num ignored read:" + num_ignored);
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(sfr);
        CloserUtil.close(sfw);
    }
}
Also used : SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) SAMRecordIterator(htsjdk.samtools.SAMRecordIterator) SAMFileWriter(htsjdk.samtools.SAMFileWriter) ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) ContigNameConverter(com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter) HashSet(java.util.HashSet)

Aggregations

ContigNameConverter (com.github.lindenb.jvarkit.util.bio.fasta.ContigNameConverter)8 HashSet (java.util.HashSet)4 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)3 VariantContext (htsjdk.variant.variantcontext.VariantContext)3 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)3 IOException (java.io.IOException)3 ArrayList (java.util.ArrayList)3 Parameter (com.beust.jcommander.Parameter)2 JvarkitException (com.github.lindenb.jvarkit.lang.JvarkitException)2 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)2 Program (com.github.lindenb.jvarkit.util.jcommander.Program)2 Logger (com.github.lindenb.jvarkit.util.log.Logger)2 KnownGene (com.github.lindenb.jvarkit.util.ucsc.KnownGene)2 SAMSequenceRecord (htsjdk.samtools.SAMSequenceRecord)2 CloserUtil (htsjdk.samtools.util.CloserUtil)2 Interval (htsjdk.samtools.util.Interval)2 Allele (htsjdk.variant.variantcontext.Allele)2 VCFHeader (htsjdk.variant.vcf.VCFHeader)2 File (java.io.File)2