use of htsjdk.tribble.readers.AsciiLineReader in project jvarkit by lindenb.
the class VcfOffsetsIndexFactory method indexVcfFile.
/**
* index a vcf file for its variant offsets
*/
public File indexVcfFile(final File vcfFile, final File indexFile) throws IOException {
LOG.info("indexing " + vcfFile);
IOUtil.assertFileIsReadable(vcfFile);
DataOutputStream daos = null;
BlockCompressedInputStream bgzin = null;
AsciiLineReader ascii = null;
VCFHeader header = null;
final VCFCodec codec = new VCFCodec();
SAMSequenceDictionaryProgress progress = null;
try {
daos = new DataOutputStream(new FileOutputStream(indexFile));
daos.write(MAGIC);
if (vcfFile.getName().endsWith(".vcf.gz")) {
bgzin = new BlockCompressedInputStream(vcfFile);
ascii = null;
} else if (vcfFile.getName().endsWith(".vcf")) {
bgzin = null;
ascii = new AsciiLineReader(new FileInputStream(vcfFile));
} else {
throw new IllegalArgumentException("not a vcf.gz or vcf file: " + vcfFile);
}
final List<String> headerLines = new ArrayList<>();
for (; ; ) {
final long offset = (ascii == null ? bgzin.getPosition() : ascii.getPosition());
final String line = (ascii == null ? bgzin.readLine() : ascii.readLine());
if (line == null)
break;
if (line.startsWith("#")) {
headerLines.add(line);
if (line.startsWith("#CHROM")) {
codec.readHeader(new LineIterator() {
int i = 0;
@Override
public String next() {
final String s = headerLines.get(i);
i++;
return s;
}
@Override
public boolean hasNext() {
return i < headerLines.size();
}
@Override
public String peek() {
return i < headerLines.size() ? headerLines.get(i) : null;
}
});
header = VCFUtils.parseHeader(headerLines).header;
progress = new SAMSequenceDictionaryProgress(header);
progress.logger(this.logger == null ? LOG : this.logger);
progress.setLogPrefix("indexing");
}
continue;
}
if (progress == null) {
throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
}
final VariantContext ctx = codec.decode(line);
progress.watch(ctx);
if (this.acceptVariant != null) {
if (!acceptVariant.test(ctx))
continue;
}
daos.writeLong(offset);
}
if (progress == null) {
throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
}
progress.finish();
daos.flush();
daos.close();
return indexFile;
} catch (final IOException err) {
throw err;
} finally {
CloserUtil.close(ascii);
CloserUtil.close(bgzin);
CloserUtil.close(daos);
}
}
use of htsjdk.tribble.readers.AsciiLineReader in project jvarkit by lindenb.
the class ConvertBedChromosomes method doWork.
@SuppressWarnings("resource")
protected int doWork(InputStream in, PrintStream out) throws IOException {
final int chromColumn0 = chromColumn1 - 1;
Pattern tab = Pattern.compile("[\t]");
LineIterator lr = new LineIteratorImpl(new AsciiLineReader(in));
while (lr.hasNext()) {
String line = lr.next();
if (BedLine.isBedHeader(line)) {
out.println(line);
continue;
}
final String[] tokens = tab.split(line, (chromColumn0 + 2));
if (chromColumn0 >= tokens.length)
throw new IOException("Bad BED line : " + line + " extected at least " + (chromColumn0 + 2) + " columns");
final String chrom = convertName(tokens[chromColumn0]);
if (chrom == null)
continue;
for (int i = 0; i < tokens.length; ++i) {
if (i > 0)
out.print("\t");
out.print(i == chromColumn0 ? chrom : tokens[i]);
}
out.println();
}
out.flush();
return 0;
}
Aggregations