Search in sources :

Example 1 with AsciiLineReader

use of htsjdk.tribble.readers.AsciiLineReader in project jvarkit by lindenb.

the class VcfOffsetsIndexFactory method indexVcfFile.

/**
 * index a vcf file for its variant offsets
 */
public File indexVcfFile(final File vcfFile, final File indexFile) throws IOException {
    LOG.info("indexing " + vcfFile);
    IOUtil.assertFileIsReadable(vcfFile);
    DataOutputStream daos = null;
    BlockCompressedInputStream bgzin = null;
    AsciiLineReader ascii = null;
    VCFHeader header = null;
    final VCFCodec codec = new VCFCodec();
    SAMSequenceDictionaryProgress progress = null;
    try {
        daos = new DataOutputStream(new FileOutputStream(indexFile));
        daos.write(MAGIC);
        if (vcfFile.getName().endsWith(".vcf.gz")) {
            bgzin = new BlockCompressedInputStream(vcfFile);
            ascii = null;
        } else if (vcfFile.getName().endsWith(".vcf")) {
            bgzin = null;
            ascii = new AsciiLineReader(new FileInputStream(vcfFile));
        } else {
            throw new IllegalArgumentException("not a vcf.gz or vcf file: " + vcfFile);
        }
        final List<String> headerLines = new ArrayList<>();
        for (; ; ) {
            final long offset = (ascii == null ? bgzin.getPosition() : ascii.getPosition());
            final String line = (ascii == null ? bgzin.readLine() : ascii.readLine());
            if (line == null)
                break;
            if (line.startsWith("#")) {
                headerLines.add(line);
                if (line.startsWith("#CHROM")) {
                    codec.readHeader(new LineIterator() {

                        int i = 0;

                        @Override
                        public String next() {
                            final String s = headerLines.get(i);
                            i++;
                            return s;
                        }

                        @Override
                        public boolean hasNext() {
                            return i < headerLines.size();
                        }

                        @Override
                        public String peek() {
                            return i < headerLines.size() ? headerLines.get(i) : null;
                        }
                    });
                    header = VCFUtils.parseHeader(headerLines).header;
                    progress = new SAMSequenceDictionaryProgress(header);
                    progress.logger(this.logger == null ? LOG : this.logger);
                    progress.setLogPrefix("indexing");
                }
                continue;
            }
            if (progress == null) {
                throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
            }
            final VariantContext ctx = codec.decode(line);
            progress.watch(ctx);
            if (this.acceptVariant != null) {
                if (!acceptVariant.test(ctx))
                    continue;
            }
            daos.writeLong(offset);
        }
        if (progress == null) {
            throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
        }
        progress.finish();
        daos.flush();
        daos.close();
        return indexFile;
    } catch (final IOException err) {
        throw err;
    } finally {
        CloserUtil.close(ascii);
        CloserUtil.close(bgzin);
        CloserUtil.close(daos);
    }
}
Also used : VCFCodec(htsjdk.variant.vcf.VCFCodec) SAMSequenceDictionaryProgress(com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress) AsciiLineReader(htsjdk.tribble.readers.AsciiLineReader) DataOutputStream(java.io.DataOutputStream) ArrayList(java.util.ArrayList) VariantContext(htsjdk.variant.variantcontext.VariantContext) IOException(java.io.IOException) LineIterator(htsjdk.tribble.readers.LineIterator) FileInputStream(java.io.FileInputStream) FileOutputStream(java.io.FileOutputStream) VCFHeader(htsjdk.variant.vcf.VCFHeader) BlockCompressedInputStream(htsjdk.samtools.util.BlockCompressedInputStream)

Example 2 with AsciiLineReader

use of htsjdk.tribble.readers.AsciiLineReader in project jvarkit by lindenb.

the class ConvertBedChromosomes method doWork.

@SuppressWarnings("resource")
protected int doWork(InputStream in, PrintStream out) throws IOException {
    final int chromColumn0 = chromColumn1 - 1;
    Pattern tab = Pattern.compile("[\t]");
    LineIterator lr = new LineIteratorImpl(new AsciiLineReader(in));
    while (lr.hasNext()) {
        String line = lr.next();
        if (BedLine.isBedHeader(line)) {
            out.println(line);
            continue;
        }
        final String[] tokens = tab.split(line, (chromColumn0 + 2));
        if (chromColumn0 >= tokens.length)
            throw new IOException("Bad BED line : " + line + " extected at least " + (chromColumn0 + 2) + " columns");
        final String chrom = convertName(tokens[chromColumn0]);
        if (chrom == null)
            continue;
        for (int i = 0; i < tokens.length; ++i) {
            if (i > 0)
                out.print("\t");
            out.print(i == chromColumn0 ? chrom : tokens[i]);
        }
        out.println();
    }
    out.flush();
    return 0;
}
Also used : Pattern(java.util.regex.Pattern) AsciiLineReader(htsjdk.tribble.readers.AsciiLineReader) IOException(java.io.IOException) LineIteratorImpl(htsjdk.tribble.readers.LineIteratorImpl) LineIterator(htsjdk.tribble.readers.LineIterator)

Aggregations

AsciiLineReader (htsjdk.tribble.readers.AsciiLineReader)2 LineIterator (htsjdk.tribble.readers.LineIterator)2 IOException (java.io.IOException)2 SAMSequenceDictionaryProgress (com.github.lindenb.jvarkit.util.picard.SAMSequenceDictionaryProgress)1 BlockCompressedInputStream (htsjdk.samtools.util.BlockCompressedInputStream)1 LineIteratorImpl (htsjdk.tribble.readers.LineIteratorImpl)1 VariantContext (htsjdk.variant.variantcontext.VariantContext)1 VCFCodec (htsjdk.variant.vcf.VCFCodec)1 VCFHeader (htsjdk.variant.vcf.VCFHeader)1 DataOutputStream (java.io.DataOutputStream)1 FileInputStream (java.io.FileInputStream)1 FileOutputStream (java.io.FileOutputStream)1 ArrayList (java.util.ArrayList)1 Pattern (java.util.regex.Pattern)1