Search in sources :

Example 21 with KnownGene

use of com.github.lindenb.jvarkit.util.ucsc.KnownGene in project jvarkit by lindenb.

the class BackLocate method run.

private void run(PrintStream out, LineIterator in) throws IOException {
    while (in.hasNext()) {
        String line = in.next();
        if (line.startsWith("#") || line.trim().isEmpty())
            continue;
        int n = line.indexOf('\t');
        if (n == 0 || n == -1)
            throw new IOException("Bad line. No tab found in " + line);
        String geneName = line.substring(0, n).trim();
        if (geneName.isEmpty())
            throw new IOException("Bad line. No gene in " + geneName);
        String mut = line.substring(n + 1).trim();
        if (!mut.matches("[A-Za-z\\*][0-9]+[A-Za-z\\*]"))
            throw new IOException("Bad mutation  in " + line);
        char aa1 = mut.substring(0, 1).toUpperCase().charAt(0);
        char aa2 = mut.substring(mut.length() - 1).toUpperCase().charAt(0);
        int position1 = Integer.parseInt(mut.substring(1, mut.length() - 1));
        if (position1 == 0)
            throw new IOException("Bad position  in " + line);
        Set<String> kgIds = this.geneSymbol2kg.get(geneName.toUpperCase());
        if (kgIds == null || kgIds.isEmpty()) {
            LOG.warn("No kgXref found for " + geneName);
            continue;
        }
        for (String kgId : kgIds) {
            KnownGene kg = this.knwonGenes.get(kgId);
            if (kg == null)
                continue;
            backLocate(out, kg, geneName, aa1, aa2, position1);
        }
    }
}
Also used : KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene) IOException(java.io.IOException)

Example 22 with KnownGene

use of com.github.lindenb.jvarkit.util.ucsc.KnownGene in project jvarkit by lindenb.

the class VcfDoest method overlap.

private List<KnownGene> overlap(final Interval interval) {
    final List<KnownGene> genes = new ArrayList<>();
    Iterator<KnownGene> iter = this.knownGenesTabix.iterator(interval);
    while (iter.hasNext()) {
        final KnownGene kg = iter.next();
        if (kg.isNonCoding() && !this.keepNonCoding)
            continue;
        genes.add(kg);
    }
    return genes;
}
Also used : ArrayList(java.util.ArrayList) KnownGene(com.github.lindenb.jvarkit.util.ucsc.KnownGene)

Aggregations

KnownGene (com.github.lindenb.jvarkit.util.ucsc.KnownGene)22 Interval (htsjdk.samtools.util.Interval)11 ArrayList (java.util.ArrayList)9 BufferedReader (java.io.BufferedReader)8 Pattern (java.util.regex.Pattern)8 IOException (java.io.IOException)7 VariantContext (htsjdk.variant.variantcontext.VariantContext)6 VCFHeader (htsjdk.variant.vcf.VCFHeader)5 File (java.io.File)5 HashSet (java.util.HashSet)5 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)4 List (java.util.List)4 Parameter (com.beust.jcommander.Parameter)3 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)3 GeneticCode (com.github.lindenb.jvarkit.util.bio.GeneticCode)3 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)3 Logger (com.github.lindenb.jvarkit.util.log.Logger)3 GenomicSequence (com.github.lindenb.jvarkit.util.picard.GenomicSequence)3 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)3 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)3