Search in sources :

Example 1 with IntervalTreeMap

use of htsjdk.samtools.util.IntervalTreeMap in project jvarkit by lindenb.

the class KnimeVariantHelper method parseBedAsBooleanIntervalTreeMap.

public IntervalTreeMap<Boolean> parseBedAsBooleanIntervalTreeMap(final String bedUri) throws IOException {
    if (bedUri == null || bedUri.isEmpty())
        throw new IllegalArgumentException("bad bed uri");
    BufferedReader r = null;
    final IntervalTreeMap<Boolean> treeMap = new IntervalTreeMap<>();
    try {
        r = IOUtils.openURIForBufferedReading(bedUri);
        final BedLineCodec codec = new BedLineCodec();
        r.lines().forEach(L -> {
            final BedLine bedline = codec.decode(L);
            if (bedline == null) {
                LOG.warn("Ignoring line in BED (doesn't look like a BED line):" + L);
                return;
            }
            treeMap.put(bedline.toInterval(), Boolean.TRUE);
        });
        return treeMap;
    } finally {
        CloserUtil.close(r);
    }
}
Also used : BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine)

Example 2 with IntervalTreeMap

use of htsjdk.samtools.util.IntervalTreeMap in project jvarkit by lindenb.

the class Launcher method readBedFileAsBooleanIntervalTreeMap.

/**
 * reads a Bed file and convert it to a IntervalTreeMap<Boolean>
 */
protected IntervalTreeMap<Boolean> readBedFileAsBooleanIntervalTreeMap(final java.io.File file) throws java.io.IOException {
    java.io.BufferedReader r = null;
    try {
        final IntervalTreeMap<Boolean> intervals = new IntervalTreeMap<Boolean>();
        r = com.github.lindenb.jvarkit.io.IOUtils.openFileForBufferedReading(file);
        final BedLineCodec bedCodec = new BedLineCodec();
        r.lines().filter(line -> !(line.startsWith("#") || com.github.lindenb.jvarkit.util.bio.bed.BedLine.isBedHeader(line) || line.isEmpty())).map(line -> bedCodec.decode(line)).filter(B -> B != null).map(B -> B.toInterval()).filter(L -> L.getStart() < L.getEnd()).forEach(L -> {
            intervals.put(L, true);
        });
        return intervals;
    } finally {
        htsjdk.samtools.util.CloserUtil.close(r);
    }
}
Also used : Manifest(java.util.jar.Manifest) Transformer(javax.xml.transform.Transformer) Arrays(java.util.Arrays) ParameterException(com.beust.jcommander.ParameterException) Enumeration(java.util.Enumeration) URL(java.net.URL) VCFHeader(htsjdk.variant.vcf.VCFHeader) StreamResult(javax.xml.transform.stream.StreamResult) Random(java.util.Random) VariantContextWriterBuilder(htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder) SAMFileHeader(htsjdk.samtools.SAMFileHeader) IntegerConverter(com.beust.jcommander.converters.IntegerConverter) StringUtil(htsjdk.samtools.util.StringUtil) Locale(java.util.Locale) Document(org.w3c.dom.Document) Map(java.util.Map) IStringConverter(com.beust.jcommander.IStringConverter) Path(java.nio.file.Path) CloserUtil(htsjdk.samtools.util.CloserUtil) ParameterDescription(com.beust.jcommander.ParameterDescription) IncludeSourceInJar(com.github.lindenb.jvarkit.annotproc.IncludeSourceInJar) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Collection(java.util.Collection) Logger(com.github.lindenb.jvarkit.util.log.Logger) SamFilterParser(com.github.lindenb.jvarkit.util.bio.samfilter.SamFilterParser) SAMFileWriter(htsjdk.samtools.SAMFileWriter) Deflater(java.util.zip.Deflater) Collectors(java.util.stream.Collectors) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) FileNotFoundException(java.io.FileNotFoundException) Dimension(java.awt.Dimension) List(java.util.List) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Pattern(java.util.regex.Pattern) DocumentBuilderFactory(javax.xml.parsers.DocumentBuilderFactory) IValueValidator(com.beust.jcommander.IValueValidator) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DOMSource(javax.xml.transform.dom.DOMSource) VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Parameter(com.beust.jcommander.Parameter) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) HashMap(java.util.HashMap) Term(com.github.lindenb.semontology.Term) Function(java.util.function.Function) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) Interval(htsjdk.samtools.util.Interval) IOUtils(com.github.lindenb.jvarkit.io.IOUtils) PrintStream(java.io.PrintStream) SAMFileWriterFactory(htsjdk.samtools.SAMFileWriterFactory) StringWriter(java.io.StringWriter) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) JCommander(com.beust.jcommander.JCommander) IOException(java.io.IOException) OutputKeys(javax.xml.transform.OutputKeys) InputStreamReader(java.io.InputStreamReader) File(java.io.File) SamRecordFilter(htsjdk.samtools.filter.SamRecordFilter) URLEncoder(java.net.URLEncoder) Element(org.w3c.dom.Element) BufferedReader(java.io.BufferedReader) TransformerFactory(javax.xml.transform.TransformerFactory) IStringConverterFactory(com.beust.jcommander.IStringConverterFactory) Collections(java.util.Collections) InputStream(java.io.InputStream) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec)

Example 3 with IntervalTreeMap

use of htsjdk.samtools.util.IntervalTreeMap in project jvarkit by lindenb.

the class KnownGene method loadUriAsIntervalTreeMap.

/**
 * load knownGene file/uri as an IntervalTreeMap. Intervals in the IntervalTreeMap are *1-based* (interval.start= kg.txStart+1)
 */
public static IntervalTreeMap<List<KnownGene>> loadUriAsIntervalTreeMap(final String uri, final Predicate<KnownGene> filterOrNull) throws IOException {
    final IntervalTreeMap<List<KnownGene>> treeMap = new IntervalTreeMap<>();
    BufferedReader in = null;
    try {
        in = IOUtils.openURIForBufferedReading(uri);
        String line;
        final Pattern tab = Pattern.compile("[\t]");
        while ((line = in.readLine()) != null) {
            if (line.isEmpty())
                continue;
            final String[] tokens = tab.split(line);
            final KnownGene g = new KnownGene(tokens);
            if (filterOrNull != null && !filterOrNull.test(g))
                continue;
            final Interval interval = new Interval(g.getContig(), g.getTxStart() + 1, g.getTxEnd(), g.isNegativeStrand(), g.getName());
            List<KnownGene> L = treeMap.get(interval);
            if (L == null) {
                L = new ArrayList<>(2);
                treeMap.put(interval, L);
            }
            L.add(g);
        }
        in.close();
        in = null;
        return treeMap;
    } finally {
        CloserUtil.close(in);
    }
}
Also used : Pattern(java.util.regex.Pattern) BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) List(java.util.List) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) Interval(htsjdk.samtools.util.Interval)

Example 4 with IntervalTreeMap

use of htsjdk.samtools.util.IntervalTreeMap in project jvarkit by lindenb.

the class SamFindClippedRegions method doWork.

/*private static boolean closeTo(int pos1,int pos2, int max)
		{
		return Math.abs(pos2-pos1)<=max;
		}*/
/*
	private static boolean same(char c1,char c2)
		{
		if(c1=='N' || c2=='N') return false;
		return Character.toUpperCase(c1)==Character.toUpperCase(c2);
		}*/
@Override
public int doWork(List<String> args) {
    int readLength = 150;
    if (args.isEmpty()) {
        LOG.error("illegal.number.of.arguments");
        return -1;
    }
    List<Input> inputs = new ArrayList<Input>();
    VariantContextWriter w = null;
    // SAMFileWriter w=null;
    try {
        SAMSequenceDictionary dict = null;
        /* create input, collect sample names */
        Map<String, Input> sample2input = new HashMap<String, Input>();
        for (final String filename : args) {
            Input input = new Input(new File(filename));
            // input.index=inputs.size();
            inputs.add(input);
            if (sample2input.containsKey(input.sampleName)) {
                LOG.error("Duplicate sample " + input.sampleName + " in " + input.bamFile + " and " + sample2input.get(input.sampleName).bamFile);
                return -1;
            }
            sample2input.put(input.sampleName, input);
            if (dict == null) {
                dict = input.header.getSequenceDictionary();
            } else if (!SequenceUtil.areSequenceDictionariesEqual(dict, input.header.getSequenceDictionary())) {
                LOG.error("Found more than one dictint sequence dictionary");
                return -1;
            }
        }
        LOG.info("Sample N= " + sample2input.size());
        /* create merged iterator */
        List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(sample2input.size());
        for (Input input : inputs) headers.add(input.header);
        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.coordinate, headers, true);
        List<SamReader> readers = new ArrayList<SamReader>(sample2input.size());
        for (Input input : inputs) readers.add(input.samFileReaderScan);
        MergingSamRecordIterator merginIter = new MergingSamRecordIterator(headerMerger, readers, true);
        Allele reference_allele = Allele.create("N", true);
        Allele[] alternate_alleles = new Allele[] { Allele.create("<CLIP5>", false), Allele.create("<CLIP3>", false) };
        Set<VCFHeaderLine> vcfHeaderLines = new HashSet<VCFHeaderLine>();
        for (Allele alt : alternate_alleles) {
            vcfHeaderLines.add(new VCFSimpleHeaderLine("<ID=" + alt.getDisplayString() + ",Description=\"StructVar\">", VCFHeaderVersion.VCF4_1, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description")));
        }
        vcfHeaderLines.add(new VCFInfoHeaderLine("COUNT_SAMPLES", 1, VCFHeaderLineType.Integer, "Number of samples with  depth>=" + this.min_depth));
        vcfHeaderLines.add(new VCFInfoHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth."));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        vcfHeaderLines.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth"));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "CmdLine", String.valueOf(getProgramCommandLine())));
        vcfHeaderLines.add(new VCFHeaderLine(getClass().getSimpleName() + "Version", String.valueOf(getVersion())));
        for (int side = 0; side < 2; ++side) {
            vcfHeaderLines.add(new VCFFormatHeaderLine("CN" + (side == 0 ? 5 : 3), 1, VCFHeaderLineType.Integer, "count clipped in " + (side == 0 ? 5 : 3) + "'"));
        }
        if (dict != null) {
            vcfHeaderLines.addAll(VCFUtils.samSequenceDictToVCFContigHeaderLine(dict));
        }
        VCFHeader vcfHeader = new VCFHeader(vcfHeaderLines, sample2input.keySet());
        w = VCFUtils.createVariantContextWriterToStdout();
        w.writeHeader(vcfHeader);
        final IntervalTreeMap<Boolean> intervals = new IntervalTreeMap<>();
        // w=swf.make(header, System.out);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        if (bedFile != null) {
            final BedLineCodec bedLineCodec = new BedLineCodec();
            LOG.info("Reading " + bedFile);
            BufferedReader r = IOUtils.openFileForBufferedReading(bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                BedLine bedLine = bedLineCodec.decode(line);
                if (bedLine == null)
                    continue;
                if (dict != null && dict.getSequence(bedLine.getContig()) == null) {
                    LOG.warning("undefined chromosome  in " + bedFile + " " + line);
                    continue;
                }
                intervals.put(bedLine.toInterval(), true);
            }
            CloserUtil.close(r);
        }
        LinkedList<SAMRecord> buffer = new LinkedList<SAMRecord>();
        final Predicate<SAMRecord> filterSamRecords = new Predicate<SAMRecord>() {

            @Override
            public boolean test(SAMRecord rec) {
                if (rec.getReadUnmappedFlag())
                    return false;
                if (rec.isSecondaryOrSupplementary())
                    return false;
                if (rec.getDuplicateReadFlag())
                    return false;
                if (rec.getReadFailsVendorQualityCheckFlag())
                    return false;
                Cigar cigar = rec.getCigar();
                if (cigar == null || cigar.numCigarElements() < 2)
                    return false;
                boolean found_S = false;
                for (int side = 0; side < 2; ++side) {
                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                    // read must be clipped on 5' or 3' with a good length
                    if (!ce.getOperator().equals(CigarOperator.S))
                        continue;
                    found_S = true;
                    break;
                }
                if (!found_S)
                    return false;
                SAMReadGroupRecord g = rec.getReadGroup();
                if (g == null || g.getSample() == null || g.getSample().isEmpty())
                    return false;
                return true;
            }
        };
        final FilteringIterator<SAMRecord> forwardIterator = new FilteringIterator<SAMRecord>(merginIter, filterSamRecords);
        for (; ; ) {
            SAMRecord rec = null;
            if (forwardIterator.hasNext()) {
                rec = forwardIterator.next();
                progress.watch(rec);
                if (intervals != null && !intervals.containsOverlapping(new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd())))
                    continue;
            }
            // need to flush buffer ?
            if (rec == null || (!buffer.isEmpty() && !buffer.getLast().getReferenceIndex().equals(rec.getReferenceIndex())) || (!buffer.isEmpty() && buffer.getLast().getUnclippedEnd() + readLength < rec.getUnclippedStart())) {
                if (!buffer.isEmpty()) {
                    int chromStart = buffer.getFirst().getUnclippedStart();
                    int chromEnd = buffer.getFirst().getUnclippedEnd();
                    for (SAMRecord sam : buffer) {
                        chromStart = Math.min(chromStart, sam.getUnclippedStart());
                        chromEnd = Math.max(chromEnd, sam.getUnclippedEnd());
                    }
                    final int winShift = 5;
                    for (int pos = chromStart; pos + winShift <= chromEnd; pos += winShift) {
                        int[] count_big_clip = new int[] { 0, 0 };
                        // int max_depth[]=new int[]{0,0};
                        List<Genotype> genotypes = new ArrayList<Genotype>();
                        Set<Allele> all_alleles = new HashSet<Allele>();
                        all_alleles.add(reference_allele);
                        boolean found_one_depth_ok = false;
                        int sum_depth = 0;
                        int samples_with_high_depth = 0;
                        for (String sample : sample2input.keySet()) {
                            GenotypeBuilder gb = new GenotypeBuilder(sample);
                            int[] count_clipped = new int[] { 0, 0 };
                            Set<Allele> sample_alleles = new HashSet<Allele>(3);
                            for (int side = 0; side < 2; ++side) {
                                for (SAMRecord sam : buffer) {
                                    if (!sam.getReadGroup().getSample().equals(sample))
                                        continue;
                                    Cigar cigar = sam.getCigar();
                                    CigarElement ce = cigar.getCigarElement(side == 0 ? 0 : cigar.numCigarElements() - 1);
                                    if (!ce.getOperator().equals(CigarOperator.S))
                                        continue;
                                    int clipStart = (side == 0 ? sam.getUnclippedStart() : sam.getAlignmentEnd() + 1);
                                    int clipEnd = (side == 0 ? sam.getAlignmentStart() - 1 : sam.getUnclippedEnd());
                                    if ((pos + winShift < clipStart || pos > clipEnd))
                                        continue;
                                    count_clipped[side]++;
                                    if (ce.getLength() >= this.min_clip_length) {
                                        count_big_clip[side]++;
                                    }
                                    sample_alleles.add(alternate_alleles[side]);
                                    gb.attribute("CN" + (side == 0 ? 5 : 3), count_clipped[side]);
                                }
                            }
                            // if(!(found_one_big_clip[0] || found_one_big_clip[1])) continue;
                            if (count_clipped[0] + count_clipped[1] == 0)
                                continue;
                            if ((count_clipped[0] + count_clipped[1]) > min_depth) {
                                found_one_depth_ok = true;
                                ++samples_with_high_depth;
                            }
                            sum_depth += (count_clipped[0] + count_clipped[1]);
                            gb.alleles(new ArrayList<Allele>(sample_alleles));
                            all_alleles.addAll(sample_alleles);
                            gb.DP(count_clipped[0] + count_clipped[1]);
                            genotypes.add(gb.make());
                        }
                        if (all_alleles.size() == 1) {
                            // all homozygotes
                            continue;
                        }
                        if (!found_one_depth_ok) {
                            continue;
                        }
                        if (!(count_big_clip[0] >= 1 || count_big_clip[1] >= 1)) {
                            continue;
                        }
                        Map<String, Object> atts = new HashMap<String, Object>();
                        atts.put("COUNT_SAMPLES", samples_with_high_depth);
                        atts.put(VCFConstants.DEPTH_KEY, sum_depth);
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(buffer.getFirst().getReferenceName());
                        vcb.start(pos);
                        vcb.stop(pos + winShift);
                        vcb.alleles(all_alleles);
                        vcb.attributes(atts);
                        vcb.genotypes(genotypes);
                        w.add(vcb.make());
                    }
                    buffer.clear();
                }
                if (rec == null) {
                    break;
                }
            }
            buffer.add(rec);
        }
        merginIter.close();
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        for (Input input : inputs) {
            CloserUtil.close(input);
        }
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) VCFSimpleHeaderLine(htsjdk.variant.vcf.VCFSimpleHeaderLine) Predicate(java.util.function.Predicate) HashSet(java.util.HashSet) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) CigarElement(htsjdk.samtools.CigarElement) LinkedList(java.util.LinkedList) BedLineCodec(com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec) BedLine(com.github.lindenb.jvarkit.util.bio.bed.BedLine) SAMRecord(htsjdk.samtools.SAMRecord) SAMFileHeader(htsjdk.samtools.SAMFileHeader) File(java.io.File) Interval(htsjdk.samtools.util.Interval) MergingSamRecordIterator(htsjdk.samtools.MergingSamRecordIterator) SAMReadGroupRecord(htsjdk.samtools.SAMReadGroupRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) SamReader(htsjdk.samtools.SamReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) SamFileHeaderMerger(htsjdk.samtools.SamFileHeaderMerger) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) Cigar(htsjdk.samtools.Cigar) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap)

Example 5 with IntervalTreeMap

use of htsjdk.samtools.util.IntervalTreeMap in project jvarkit by lindenb.

the class ReferenceToVCF method doWork.

@Override
public int doWork(List<String> args) {
    if (this.bedFile != null) {
        if (limitBed == null)
            limitBed = new IntervalTreeMap<Boolean>();
        try {
            Pattern tab = Pattern.compile("[\t]");
            BufferedReader r = IOUtils.openFileForBufferedReading(this.bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                if (BedLine.isBedHeader(line))
                    continue;
                if (line.startsWith("#") || line.isEmpty())
                    continue;
                String[] tokens = tab.split(line, 4);
                limitBed.put(new Interval(tokens[0], 1 + Integer.parseInt(tokens[1]), 1 + Integer.parseInt(tokens[2])), true);
            }
            CloserUtil.close(r);
        } catch (Exception err) {
            LOG.error(err);
            return -1;
        }
    }
    Random random = new Random(0L);
    VariantContextWriter out = null;
    try {
        final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(oneAndOnlyOneFile(args)));
        SAMSequenceDictionary dict = fasta.getSequenceDictionary();
        out = super.openVariantContextWriter(this.outputFile);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(dict);
        out.writeHeader(header);
        final List<List<Allele>> combination = new ArrayList<List<Allele>>(4);
        // always keep REF as first allele please
        combination.add(Arrays.asList(Allele.create("A", true), Allele.create("C", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("C", true), Allele.create("A", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("G", true), Allele.create("A", false), Allele.create("C", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("T", true), Allele.create("A", false), Allele.create("C", false), Allele.create("G", false)));
        for (SAMSequenceRecord ssr : dict.getSequences()) {
            GenomicSequence genome = new GenomicSequence(fasta, ssr.getSequenceName());
            if (this.limitBed != null) {
                Interval interval = new Interval(ssr.getSequenceName(), 1, genome.length());
                if (!this.limitBed.containsOverlapping(interval))
                    continue;
            }
            for (int n = 0; n < genome.length(); ++n) {
                progress.watch(ssr.getSequenceIndex(), n);
                List<Allele> alleles = null;
                byte ref = (byte) genome.charAt(n);
                switch(ref) {
                    case 'a':
                    case 'A':
                        alleles = combination.get(0);
                        break;
                    case 'c':
                    case 'C':
                        alleles = combination.get(1);
                        break;
                    case 'g':
                    case 'G':
                        alleles = combination.get(2);
                        break;
                    case 't':
                    case 'T':
                        alleles = combination.get(3);
                        break;
                    default:
                        break;
                }
                if (alleles == null)
                    continue;
                if (this.limitBed != null) {
                    Interval interval = new Interval(ssr.getSequenceName(), n + 1, n + 1);
                    if (!this.limitBed.containsOverlapping(interval))
                        continue;
                }
                if (!disjoint_alts) {
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.stop(n + 1);
                    vcb.alleles(alleles);
                    out.add(vcb.make());
                } else {
                    for (// index 0 is always REF
                    int a = 1; // index 0 is always REF
                    a < 4; // index 0 is always REF
                    ++a) {
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(ssr.getSequenceName());
                        vcb.start(n + 1);
                        vcb.stop(n + 1);
                        // index 0 is always REF
                        vcb.alleles(Arrays.asList(alleles.get(0), alleles.get(a)));
                        out.add(vcb.make());
                    }
                }
                if (insertion_size > 0 && n + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REFERENCE
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(n + 1), true));
                    StringBuilder sb = new StringBuilder(insertion_size + 2);
                    sb.append(genome.charAt(n));
                    for (int n2 = 0; n2 < insertion_size; ++n2) {
                        switch(random.nextInt(4)) {
                            case 0:
                                sb.append('A');
                                break;
                            case 1:
                                sb.append('C');
                                break;
                            case 2:
                                sb.append('G');
                                break;
                            case 3:
                                sb.append('T');
                                break;
                        }
                    }
                    sb.append(genome.charAt(n + 1));
                    alleles.add(Allele.create(sb.toString(), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (deletion_size > 0 && n + deletion_size + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REF
                    StringBuilder sb = new StringBuilder(deletion_size + 2);
                    sb.append(genome.charAt(n));
                    int lastpos = n + 1;
                    for (int n2 = 0; n2 < deletion_size; ++n2, lastpos++) {
                        sb.append(genome.charAt(lastpos));
                    }
                    sb.append(genome.charAt(lastpos));
                    alleles.add(Allele.create(sb.toString(), true));
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(lastpos), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (out.checkError())
                    break;
            }
            if (out.checkError())
                break;
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Interval(htsjdk.samtools.util.Interval)

Aggregations

IntervalTreeMap (htsjdk.samtools.util.IntervalTreeMap)9 Interval (htsjdk.samtools.util.Interval)8 BufferedReader (java.io.BufferedReader)8 File (java.io.File)7 ArrayList (java.util.ArrayList)7 VCFHeader (htsjdk.variant.vcf.VCFHeader)6 List (java.util.List)6 BedLineCodec (com.github.lindenb.jvarkit.util.bio.bed.BedLineCodec)5 SAMFileHeader (htsjdk.samtools.SAMFileHeader)5 VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)5 HashMap (java.util.HashMap)5 Parameter (com.beust.jcommander.Parameter)4 Logger (com.github.lindenb.jvarkit.util.log.Logger)4 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)4 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)4 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)4 IOException (java.io.IOException)4 HashSet (java.util.HashSet)4 IOUtils (com.github.lindenb.jvarkit.io.IOUtils)3 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)3