Search in sources :

Example 96 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class ReferenceToVCF method doWork.

@Override
public int doWork(List<String> args) {
    if (this.bedFile != null) {
        if (limitBed == null)
            limitBed = new IntervalTreeMap<Boolean>();
        try {
            Pattern tab = Pattern.compile("[\t]");
            BufferedReader r = IOUtils.openFileForBufferedReading(this.bedFile);
            String line;
            while ((line = r.readLine()) != null) {
                if (BedLine.isBedHeader(line))
                    continue;
                if (line.startsWith("#") || line.isEmpty())
                    continue;
                String[] tokens = tab.split(line, 4);
                limitBed.put(new Interval(tokens[0], 1 + Integer.parseInt(tokens[1]), 1 + Integer.parseInt(tokens[2])), true);
            }
            CloserUtil.close(r);
        } catch (Exception err) {
            LOG.error(err);
            return -1;
        }
    }
    Random random = new Random(0L);
    VariantContextWriter out = null;
    try {
        final IndexedFastaSequenceFile fasta = new IndexedFastaSequenceFile(new File(oneAndOnlyOneFile(args)));
        SAMSequenceDictionary dict = fasta.getSequenceDictionary();
        out = super.openVariantContextWriter(this.outputFile);
        SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(dict);
        VCFHeader header = new VCFHeader();
        header.setSequenceDictionary(dict);
        out.writeHeader(header);
        final List<List<Allele>> combination = new ArrayList<List<Allele>>(4);
        // always keep REF as first allele please
        combination.add(Arrays.asList(Allele.create("A", true), Allele.create("C", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("C", true), Allele.create("A", false), Allele.create("G", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("G", true), Allele.create("A", false), Allele.create("C", false), Allele.create("T", false)));
        combination.add(Arrays.asList(Allele.create("T", true), Allele.create("A", false), Allele.create("C", false), Allele.create("G", false)));
        for (SAMSequenceRecord ssr : dict.getSequences()) {
            GenomicSequence genome = new GenomicSequence(fasta, ssr.getSequenceName());
            if (this.limitBed != null) {
                Interval interval = new Interval(ssr.getSequenceName(), 1, genome.length());
                if (!this.limitBed.containsOverlapping(interval))
                    continue;
            }
            for (int n = 0; n < genome.length(); ++n) {
                progress.watch(ssr.getSequenceIndex(), n);
                List<Allele> alleles = null;
                byte ref = (byte) genome.charAt(n);
                switch(ref) {
                    case 'a':
                    case 'A':
                        alleles = combination.get(0);
                        break;
                    case 'c':
                    case 'C':
                        alleles = combination.get(1);
                        break;
                    case 'g':
                    case 'G':
                        alleles = combination.get(2);
                        break;
                    case 't':
                    case 'T':
                        alleles = combination.get(3);
                        break;
                    default:
                        break;
                }
                if (alleles == null)
                    continue;
                if (this.limitBed != null) {
                    Interval interval = new Interval(ssr.getSequenceName(), n + 1, n + 1);
                    if (!this.limitBed.containsOverlapping(interval))
                        continue;
                }
                if (!disjoint_alts) {
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.stop(n + 1);
                    vcb.alleles(alleles);
                    out.add(vcb.make());
                } else {
                    for (// index 0 is always REF
                    int a = 1; // index 0 is always REF
                    a < 4; // index 0 is always REF
                    ++a) {
                        VariantContextBuilder vcb = new VariantContextBuilder();
                        vcb.chr(ssr.getSequenceName());
                        vcb.start(n + 1);
                        vcb.stop(n + 1);
                        // index 0 is always REF
                        vcb.alleles(Arrays.asList(alleles.get(0), alleles.get(a)));
                        out.add(vcb.make());
                    }
                }
                if (insertion_size > 0 && n + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REFERENCE
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(n + 1), true));
                    StringBuilder sb = new StringBuilder(insertion_size + 2);
                    sb.append(genome.charAt(n));
                    for (int n2 = 0; n2 < insertion_size; ++n2) {
                        switch(random.nextInt(4)) {
                            case 0:
                                sb.append('A');
                                break;
                            case 1:
                                sb.append('C');
                                break;
                            case 2:
                                sb.append('G');
                                break;
                            case 3:
                                sb.append('T');
                                break;
                        }
                    }
                    sb.append(genome.charAt(n + 1));
                    alleles.add(Allele.create(sb.toString(), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (deletion_size > 0 && n + deletion_size + 1 < genome.length()) {
                    alleles = new ArrayList<Allele>(2);
                    // REF
                    StringBuilder sb = new StringBuilder(deletion_size + 2);
                    sb.append(genome.charAt(n));
                    int lastpos = n + 1;
                    for (int n2 = 0; n2 < deletion_size; ++n2, lastpos++) {
                        sb.append(genome.charAt(lastpos));
                    }
                    sb.append(genome.charAt(lastpos));
                    alleles.add(Allele.create(sb.toString(), true));
                    alleles.add(Allele.create("" + genome.charAt(n) + genome.charAt(lastpos), false));
                    VariantContextBuilder vcb = new VariantContextBuilder();
                    vcb.chr(ssr.getSequenceName());
                    vcb.start(n + 1);
                    vcb.alleles(alleles);
                    vcb.computeEndFromAlleles(alleles, n + 1);
                    out.add(vcb.make());
                }
                if (out.checkError())
                    break;
            }
            if (out.checkError())
                break;
        }
        progress.finish();
        return 0;
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(out);
    }
}
Also used : ArrayList(java.util.ArrayList) SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Random(java.util.Random) ArrayList(java.util.ArrayList) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) Pattern(java.util.regex.Pattern) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) GenomicSequence(com.github.lindenb.jvarkit.util.picard.GenomicSequence) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) BufferedReader(java.io.BufferedReader) IntervalTreeMap(htsjdk.samtools.util.IntervalTreeMap) File(java.io.File) IndexedFastaSequenceFile(htsjdk.samtools.reference.IndexedFastaSequenceFile) Interval(htsjdk.samtools.util.Interval)

Example 97 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VCFCompareGT method doWork.

@Override
public int doWork(final List<String> arguments) {
    final List<File> inputVcfFiles = new ArrayList<>(IOUtil.unrollFiles(arguments.stream().map(F -> new File(F)).collect(Collectors.toCollection(HashSet::new)), ".vcf", "vcf.gz"));
    if (inputVcfFiles.isEmpty()) {
        LOG.error("VCF missing.");
        return -1;
    }
    VariantComparator varcmp = new VariantComparator();
    SortingCollection<Variant> variants = null;
    final Set<String> sampleNames = new LinkedHashSet<>();
    try {
        variants = SortingCollection.newInstance(Variant.class, new VariantCodec(), varcmp, writingSortingCollection.getMaxRecordsInRam(), writingSortingCollection.getTmpPaths());
        variants.setDestructiveIteration(true);
        final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
        metaData.add(new VCFHeaderLine(getClass().getSimpleName(), "version:" + getVersion() + " command:" + getProgramCommandLine()));
        for (int i = 0; i < inputVcfFiles.size(); ++i) {
            final File vcfFile = inputVcfFiles.get(i);
            LOG.info("Opening " + vcfFile);
            final VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
            final CloseableIterator<VariantContext> iter = vcfFileReader.iterator();
            final VCFHeader header = vcfFileReader.getFileHeader();
            sampleNames.addAll(header.getSampleNamesInOrder());
            metaData.add(new VCFHeaderLine(getClass().getSimpleName() + "_" + ((i) + 1), "File: " + vcfFile.getPath()));
            long nLines = 0;
            while (iter.hasNext()) {
                final VariantContext var = iter.next();
                if (nLines++ % 10000 == 0) {
                    LOG.info(vcfFile + " " + nLines);
                }
                if (!var.isVariant())
                    continue;
                if (!var.hasGenotypes())
                    continue;
                for (final Genotype genotype : var.getGenotypes()) {
                    final Variant rec = new Variant();
                    if (!genotype.isAvailable())
                        continue;
                    if (!genotype.isCalled())
                        continue;
                    if (genotype.isNoCall())
                        continue;
                    rec.file_index = i + 1;
                    rec.sampleName = genotype.getSampleName();
                    rec.chrom = var.getContig();
                    rec.start = var.getStart();
                    rec.end = var.getEnd();
                    rec.ref = var.getReference().getDisplayString();
                    if (var.hasID()) {
                        rec.id = var.getID();
                    }
                    if (genotype.hasDP()) {
                        rec.dp = genotype.getDP();
                    }
                    if (genotype.hasGQ()) {
                        rec.gq = genotype.getGQ();
                    }
                    final List<Allele> alleles = genotype.getAlleles();
                    if (alleles == null)
                        continue;
                    if (alleles.size() == 1) {
                        rec.a1 = alleles.get(0).getDisplayString().toUpperCase();
                        rec.a2 = rec.a1;
                    } else if (alleles.size() == 2) {
                        rec.a1 = alleles.get(0).getDisplayString().toUpperCase();
                        rec.a2 = alleles.get(1).getDisplayString().toUpperCase();
                        if (rec.a1.compareTo(rec.a2) > 0) {
                            String tmp = rec.a2;
                            rec.a2 = rec.a1;
                            rec.a1 = tmp;
                        }
                    } else {
                        continue;
                    }
                    variants.add(rec);
                }
            }
            iter.close();
            vcfFileReader.close();
        }
        variants.doneAdding();
        LOG.info("Done Adding");
        final Set<String> newSampleNames = new HashSet<>();
        for (int i = 0; i < inputVcfFiles.size(); ++i) {
            for (final String sample : sampleNames) {
                newSampleNames.add(sample + "_" + ((i) + 1));
            }
        }
        final String GenpotypeChangedKey = "GCH";
        final String GenpotypeCreated = "GNW";
        final String GenpotypeDiff = "GDF";
        metaData.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        metaData.add(new VCFFormatHeaderLine("DP", 1, VCFHeaderLineType.Integer, "Depth"));
        metaData.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Qual"));
        metaData.add(new VCFFormatHeaderLine(GenpotypeChangedKey, 1, VCFHeaderLineType.Integer, "Changed Genotype"));
        metaData.add(new VCFFormatHeaderLine(GenpotypeCreated, 1, VCFHeaderLineType.Integer, "Genotype Created/Deleted"));
        metaData.add(new VCFInfoHeaderLine(GenpotypeDiff, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Samples with Genotype Difference"));
        final VCFHeader header = new VCFHeader(metaData, new ArrayList<String>(newSampleNames));
        final VariantContextWriter w = super.openVariantContextWriter(outputFile);
        w.writeHeader(header);
        final PosComparator posCompare = new PosComparator();
        final EqualRangeIterator<Variant> iter = new EqualRangeIterator<>(variants.iterator(), posCompare);
        while (iter.hasNext()) {
            final List<Variant> row = iter.next();
            /**
             * this sample is not always the same
             */
            final Set<String> samplesModified = new TreeSet<>();
            /**
             * the number of sample is different from vcflist.size()
             */
            final Set<String> samplesCreates = new TreeSet<>();
            final Counter<String> samplesSeen = new Counter<>();
            for (int x = 0; x < row.size(); ++x) {
                final Variant var1 = row.get(x);
                samplesSeen.incr(var1.sampleName);
                for (int y = x + 1; y < row.size(); ++y) {
                    final Variant var2 = row.get(y);
                    if (!var2.sampleName.equals(var1.sampleName))
                        continue;
                    if (var1.a1.equals(var2.a1) && var1.a2.equals(var2.a2))
                        continue;
                    samplesModified.add(var1.sampleName);
                }
            }
            for (final String sampleName : samplesSeen.keySet()) {
                if (samplesSeen.count(sampleName) != inputVcfFiles.size()) {
                    samplesCreates.add(sampleName);
                }
            }
            final Variant first = row.get(0);
            final Set<Allele> alleles = new HashSet<>();
            alleles.add(Allele.create(first.ref, true));
            for (final Variant var : row) {
                alleles.add(Allele.create(var.a1, var.a1.equalsIgnoreCase(var.ref)));
                alleles.add(Allele.create(var.a2, var.a2.equalsIgnoreCase(var.ref)));
            }
            final VariantContextBuilder b = new VariantContextBuilder(getClass().getName(), first.chrom, first.start, first.end, alleles);
            // build genotypes
            final List<Genotype> genotypes = new ArrayList<Genotype>();
            for (final Variant var : row) {
                // alleles for this genotype
                final List<Allele> galleles = new ArrayList<Allele>();
                galleles.add(Allele.create(var.a1, var.a1.equalsIgnoreCase(var.ref)));
                galleles.add(Allele.create(var.a2, var.a2.equalsIgnoreCase(var.ref)));
                final GenotypeBuilder gb = new GenotypeBuilder();
                gb.DP(var.dp);
                gb.alleles(galleles);
                gb.name(var.sampleName + "_" + var.file_index);
                gb.GQ(var.gq);
                gb.attribute(GenpotypeChangedKey, samplesModified.contains(var.sampleName) ? 1 : 0);
                gb.attribute(GenpotypeCreated, samplesCreates.contains(var.sampleName) ? 1 : 0);
                genotypes.add(gb.make());
            }
            b.genotypes(genotypes);
            b.id(first.id);
            if (!(samplesModified.isEmpty() && samplesCreates.isEmpty())) {
                Set<String> set2 = new TreeSet<String>(samplesModified);
                set2.addAll(samplesCreates);
                b.attribute(GenpotypeDiff, set2.toArray());
            }
            if (!only_print_modified || !(samplesModified.isEmpty() && samplesCreates.isEmpty())) {
                w.add(b.make());
            }
        }
        iter.close();
        w.close();
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        if (variants != null)
            try {
                variants.cleanup();
            } catch (Exception err) {
            }
    }
    return 0;
}
Also used : Genotype(htsjdk.variant.variantcontext.Genotype) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) DataInputStream(java.io.DataInputStream) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Allele(htsjdk.variant.variantcontext.Allele) Program(com.github.lindenb.jvarkit.util.jcommander.Program) IOUtil(htsjdk.samtools.util.IOUtil) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) TreeSet(java.util.TreeSet) ParametersDelegate(com.beust.jcommander.ParametersDelegate) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) DataOutputStream(java.io.DataOutputStream) AbstractDataCodec(com.github.lindenb.jvarkit.util.picard.AbstractDataCodec) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) LinkedHashSet(java.util.LinkedHashSet) VCFConstants(htsjdk.variant.vcf.VCFConstants) SortingCollection(htsjdk.samtools.util.SortingCollection) VCFHeaderLineType(htsjdk.variant.vcf.VCFHeaderLineType) Counter(com.github.lindenb.jvarkit.util.Counter) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) Logger(com.github.lindenb.jvarkit.util.log.Logger) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) File(java.io.File) List(java.util.List) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) VariantContext(htsjdk.variant.variantcontext.VariantContext) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Comparator(java.util.Comparator) VCFHeaderLineCount(htsjdk.variant.vcf.VCFHeaderLineCount) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) LinkedHashSet(java.util.LinkedHashSet) VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) ArrayList(java.util.ArrayList) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VariantContext(htsjdk.variant.variantcontext.VariantContext) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) Counter(com.github.lindenb.jvarkit.util.Counter) TreeSet(java.util.TreeSet) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader) HashSet(java.util.HashSet) LinkedHashSet(java.util.LinkedHashSet) VCFFormatHeaderLine(htsjdk.variant.vcf.VCFFormatHeaderLine) Genotype(htsjdk.variant.variantcontext.Genotype) GenotypeBuilder(htsjdk.variant.variantcontext.GenotypeBuilder) VCFInfoHeaderLine(htsjdk.variant.vcf.VCFInfoHeaderLine) IOException(java.io.IOException) Allele(htsjdk.variant.variantcontext.Allele) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) File(java.io.File)

Example 98 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfIn method scanFileSorted.

private int scanFileSorted(final VariantContextWriter vcw, final String databaseVcfUri, final VcfIterator userVcfIn) {
    EqualRangeVcfIterator equalRangeDbIter = null;
    EqualRangeIterator<VariantContext> equalRangeUserVcf = null;
    try {
        final VCFHeader header = new VCFHeader(userVcfIn.getHeader());
        final SAMSequenceDictionary userVcfDict = header.getSequenceDictionary();
        // / NO need if(dict1==null)
        if (userVcfDict == null) {
            LOG.error(JvarkitException.VcfDictionaryMissing.getMessage("user file"));
            return -1;
        }
        final Comparator<VariantContext> userVcfComparator = VCFUtils.createTidPosComparator(userVcfDict);
        equalRangeDbIter = new EqualRangeVcfIterator(VCFUtils.createVcfIterator(databaseVcfUri), userVcfComparator);
        this.addMetaData(header);
        vcw.writeHeader(header);
        final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(userVcfDict).logger(LOG);
        equalRangeUserVcf = new EqualRangeIterator<>(userVcfIn, userVcfComparator);
        while (equalRangeUserVcf.hasNext()) {
            final List<VariantContext> ctxList = equalRangeUserVcf.next();
            progress.watch(ctxList.get(0));
            // fill both contextes
            final List<VariantContext> dbContexes = new ArrayList<VariantContext>(equalRangeDbIter.next(ctxList.get(0)));
            for (final VariantContext userCtx : ctxList) {
                boolean keep = dbContexes.stream().filter(V -> sameContext(userCtx, V)).anyMatch(V -> allUserAltFoundInDatabase(userCtx, V));
                addVariant(vcw, userCtx, keep);
            }
            if (vcw.checkError())
                break;
        }
        equalRangeUserVcf.close();
        return RETURN_OK;
    } catch (final Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(equalRangeDbIter);
        CloserUtil.close(userVcfIn);
        CloserUtil.close(vcw);
    }
}
Also used : VCFUtils(com.github.lindenb.jvarkit.util.vcf.VCFUtils) Allele(htsjdk.variant.variantcontext.Allele) CloseableIterator(htsjdk.samtools.util.CloseableIterator) Program(com.github.lindenb.jvarkit.util.jcommander.Program) Parameter(com.beust.jcommander.Parameter) VCFFileReader(htsjdk.variant.vcf.VCFFileReader) VCFHeader(htsjdk.variant.vcf.VCFHeader) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Launcher(com.github.lindenb.jvarkit.util.jcommander.Launcher) TabixVcfFileReader(com.github.lindenb.jvarkit.util.vcf.TabixVcfFileReader) CloserUtil(htsjdk.samtools.util.CloserUtil) VCFFilterHeaderLine(htsjdk.variant.vcf.VCFFilterHeaderLine) Iterator(java.util.Iterator) Logger(com.github.lindenb.jvarkit.util.log.Logger) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) Set(java.util.Set) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) File(java.io.File) List(java.util.List) EqualRangeIterator(com.github.lindenb.jvarkit.util.iterator.EqualRangeIterator) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VariantContext(htsjdk.variant.variantcontext.VariantContext) Comparator(java.util.Comparator) VariantContextBuilder(htsjdk.variant.variantcontext.VariantContextBuilder) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) JvarkitException(com.github.lindenb.jvarkit.lang.JvarkitException) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Example 99 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfConcat method doWork.

@Override
public int doWork(List<String> args) {
    VariantContextWriter w = null;
    BufferedReader r = null;
    try {
        if (args.isEmpty()) {
            LOG.info("Reading filenames from stdin");
            r = IOUtils.openStdinForBufferedReader();
            this.inputFiles.addAll(r.lines().filter(line -> !(line.trim().isEmpty() || line.startsWith("#"))).collect(Collectors.toSet()));
            r.close();
            r = null;
        } else {
            for (final String filename : args) {
                this.inputFiles.addAll(IOUtils.unrollFiles(Arrays.asList(filename)));
            }
        }
        if (this.inputFiles.isEmpty()) {
            LOG.error("No input");
            return -1;
        }
        w = super.openVariantContextWriter(this.outputfile);
        return fromFiles(w);
    } catch (Exception err) {
        LOG.error(err);
        return -1;
    } finally {
        CloserUtil.close(w);
        CloserUtil.close(r);
    }
}
Also used : BufferedReader(java.io.BufferedReader) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) IOException(java.io.IOException)

Example 100 with VariantContextWriter

use of htsjdk.variant.variantcontext.writer.VariantContextWriter in project jvarkit by lindenb.

the class VcfToZip method doWork.

@Override
public int doWork(List<String> args) {
    if (this.outputFile != null && this.outputFile.getName().endsWith(".zip")) {
        LOG.error("Filename must end with '.zip' " + outputFile);
        return -1;
    }
    LineIterator lr = null;
    VcfIterator in = null;
    ZipOutputStream zout = null;
    FileOutputStream fout = null;
    int num_vcfs = 0;
    VariantContextWriter vcw = null;
    args = new ArrayList<>(IOUtils.unrollFiles(args));
    try {
        int optind = 0;
        if (this.outputFile != null) {
            fout = new FileOutputStream(this.outputFile);
            zout = new ZipOutputStream(fout);
        } else {
            zout = new ZipOutputStream(stdout());
        }
        do {
            if (args.isEmpty()) {
                LOG.info("reading concatenated vcf from stdin");
                lr = IOUtils.openStreamForLineIterator(stdin());
            } else {
                LOG.info("reading concatenated vcf from " + args.get(optind));
                lr = IOUtils.openURIForLineIterator(args.get(optind));
            }
            while (lr.hasNext()) {
                ++num_vcfs;
                in = VCFUtils.createVcfIteratorFromLineIterator(lr, true);
                final VCFHeader header = in.getHeader();
                String filename = null;
                if (this.titleHeaderStr != null && !this.titleHeaderStr.isEmpty()) {
                    final VCFHeaderLine h = header.getOtherHeaderLine(this.titleHeaderStr);
                    if (h != null && !h.getValue().trim().isEmpty())
                        filename = h.getValue().trim();
                }
                if (filename == null || filename.trim().isEmpty()) {
                    // create title
                    filename = String.format("vcf2zip.%05d.vcf", num_vcfs);
                    // set name in header
                    if (this.titleHeaderStr != null && !this.titleHeaderStr.isEmpty()) {
                        header.addMetaDataLine(new VCFHeaderLine(this.titleHeaderStr.trim(), filename));
                    }
                }
                if (!filename.endsWith(".vcf")) {
                    filename += ".vcf";
                }
                if (!this.zipPrefix.isEmpty()) {
                    filename = this.zipPrefix + (this.zipPrefix.endsWith("/") ? "" : "/") + filename;
                }
                LOG.info(filename);
                final ZipEntry entry = new ZipEntry(filename);
                entry.setComment("Created with " + getProgramName());
                zout.putNextEntry(entry);
                vcw = VCFUtils.createVariantContextWriterToOutputStream(IOUtils.uncloseableOutputStream(zout));
                vcw.writeHeader(header);
                final SAMSequenceDictionaryProgress progress = new SAMSequenceDictionaryProgress(header);
                while (in.hasNext()) {
                    vcw.add(progress.watch(in.next()));
                }
                vcw.close();
                progress.finish();
                zout.closeEntry();
                in.close();
            }
            ++optind;
        } while (optind < args.size());
        zout.finish();
        zout.flush();
        zout.close();
        zout = null;
        CloserUtil.close(fout);
        LOG.info("done. Number of VCFs:" + num_vcfs);
        return RETURN_OK;
    } catch (final Exception e) {
        LOG.error(e);
        return -1;
    } finally {
        CloserUtil.close(in);
        CloserUtil.close(lr);
        CloserUtil.close(zout);
        CloserUtil.close(fout);
    }
}
Also used : VCFHeaderLine(htsjdk.variant.vcf.VCFHeaderLine) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) ZipEntry(java.util.zip.ZipEntry) LineIterator(htsjdk.tribble.readers.LineIterator) VcfIterator(com.github.lindenb.jvarkit.util.vcf.VcfIterator) ZipOutputStream(java.util.zip.ZipOutputStream) FileOutputStream(java.io.FileOutputStream) VariantContextWriter(htsjdk.variant.variantcontext.writer.VariantContextWriter) VCFHeader(htsjdk.variant.vcf.VCFHeader)

Aggregations

VariantContextWriter (htsjdk.variant.variantcontext.writer.VariantContextWriter)132 File (java.io.File)67 VCFHeader (htsjdk.variant.vcf.VCFHeader)65 VariantContext (htsjdk.variant.variantcontext.VariantContext)60 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)53 ArrayList (java.util.ArrayList)41 IOException (java.io.IOException)38 VariantContextBuilder (htsjdk.variant.variantcontext.VariantContextBuilder)35 VCFHeaderLine (htsjdk.variant.vcf.VCFHeaderLine)34 HashSet (java.util.HashSet)32 List (java.util.List)29 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)28 Allele (htsjdk.variant.variantcontext.Allele)28 Collectors (java.util.stream.Collectors)27 VcfIterator (com.github.lindenb.jvarkit.util.vcf.VcfIterator)26 VCFFileReader (htsjdk.variant.vcf.VCFFileReader)26 Parameter (com.beust.jcommander.Parameter)24 Logger (com.github.lindenb.jvarkit.util.log.Logger)24 Launcher (com.github.lindenb.jvarkit.util.jcommander.Launcher)23 Program (com.github.lindenb.jvarkit.util.jcommander.Program)23