use of htsjdk.variant.vcf.VCFCodec in project jvarkit by lindenb.
the class JfxNgs method doMenuIndexVcf.
/**
* open index a VCF file
*/
private void doMenuIndexVcf(final Window owner) {
final FileChooser fc = newFileChooser();
fc.getExtensionFilters().addAll(VcfStage.EXTENSION_FILTERS);
final List<File> files = fc.showOpenMultipleDialog(owner);
if (files == null)
return;
for (final File file : files) {
updateLastDir(file);
if (file.getName().endsWith(".vcf.gz")) {
LOG.info("writing tabix index for " + file);
final File output = new File(file.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
try {
if (output.exists()) {
throw new IOException("Tabix index " + output + " already exists.");
}
final TabixIndex index = IndexFactory.createTabixIndex(file, new VCFCodec(), (SAMSequenceDictionary) null);
index.write(output);
final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
alert.showAndWait();
} catch (final Exception err) {
showExceptionDialog(owner, err);
break;
}
} else if (file.getName().endsWith(".vcf")) {
LOG.info("writing tribble index for " + file);
final File output = new File(file.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
try {
if (output.exists()) {
throw new IOException("Tribble index " + output + " already exists.");
}
final Index index = IndexFactory.createIndex(file, new VCFCodec(), IndexType.LINEAR);
index.writeBasedOnFeatureFile(file);
final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
alert.showAndWait();
} catch (final Exception err) {
showExceptionDialog(owner, err);
break;
}
} else {
showExceptionDialog(owner, "Cannot index file " + file);
break;
}
}
}
use of htsjdk.variant.vcf.VCFCodec in project jvarkit by lindenb.
the class VcfOffsetsIndexFactory method indexVcfFile.
/**
* index a vcf file for its variant offsets
*/
public File indexVcfFile(final File vcfFile, final File indexFile) throws IOException {
LOG.info("indexing " + vcfFile);
IOUtil.assertFileIsReadable(vcfFile);
DataOutputStream daos = null;
BlockCompressedInputStream bgzin = null;
AsciiLineReader ascii = null;
VCFHeader header = null;
final VCFCodec codec = new VCFCodec();
SAMSequenceDictionaryProgress progress = null;
try {
daos = new DataOutputStream(new FileOutputStream(indexFile));
daos.write(MAGIC);
if (vcfFile.getName().endsWith(".vcf.gz")) {
bgzin = new BlockCompressedInputStream(vcfFile);
ascii = null;
} else if (vcfFile.getName().endsWith(".vcf")) {
bgzin = null;
ascii = new AsciiLineReader(new FileInputStream(vcfFile));
} else {
throw new IllegalArgumentException("not a vcf.gz or vcf file: " + vcfFile);
}
final List<String> headerLines = new ArrayList<>();
for (; ; ) {
final long offset = (ascii == null ? bgzin.getPosition() : ascii.getPosition());
final String line = (ascii == null ? bgzin.readLine() : ascii.readLine());
if (line == null)
break;
if (line.startsWith("#")) {
headerLines.add(line);
if (line.startsWith("#CHROM")) {
codec.readHeader(new LineIterator() {
int i = 0;
@Override
public String next() {
final String s = headerLines.get(i);
i++;
return s;
}
@Override
public boolean hasNext() {
return i < headerLines.size();
}
@Override
public String peek() {
return i < headerLines.size() ? headerLines.get(i) : null;
}
});
header = VCFUtils.parseHeader(headerLines).header;
progress = new SAMSequenceDictionaryProgress(header);
progress.logger(this.logger == null ? LOG : this.logger);
progress.setLogPrefix("indexing");
}
continue;
}
if (progress == null) {
throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
}
final VariantContext ctx = codec.decode(line);
progress.watch(ctx);
if (this.acceptVariant != null) {
if (!acceptVariant.test(ctx))
continue;
}
daos.writeLong(offset);
}
if (progress == null) {
throw new JvarkitException.FileFormatError("no vcf header in " + vcfFile);
}
progress.finish();
daos.flush();
daos.close();
return indexFile;
} catch (final IOException err) {
throw err;
} finally {
CloserUtil.close(ascii);
CloserUtil.close(bgzin);
CloserUtil.close(daos);
}
}
use of htsjdk.variant.vcf.VCFCodec in project jvarkit by lindenb.
the class VCFUtils method findCodecFromLines.
/**
* find a codec from the lines header. if not found, return default codec
*/
public static AbstractVCFCodec findCodecFromLines(final List<String> list) {
for (final String line : list) {
String formatString = line;
if (formatString.startsWith("##")) {
formatString = formatString.substring(2);
}
int eq = formatString.indexOf('=');
if (eq == -1)
continue;
if (!VCFHeaderVersion.isFormatString(formatString.substring(0, eq)))
continue;
VCFHeaderVersion version = VCFHeaderVersion.getHeaderVersion(line);
if (version == null)
continue;
switch(version) {
case VCF3_2:
case VCF3_3:
return new VCF3Codec();
case VCF4_0:
case VCF4_1:
case VCF4_2:
return new VCFCodec();
}
}
return createDefaultVCFCodec();
}
use of htsjdk.variant.vcf.VCFCodec in project gatk by broadinstitute.
the class GenomicsDBImportIntegrationTest method testPreserveContigOrderingInHeader.
@Test
public void testPreserveContigOrderingInHeader() throws IOException {
final String workspace = createTempDir("testPreserveContigOrderingInHeader-").getAbsolutePath() + "/workspace";
writeToGenomicsDB(Arrays.asList(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting2.g.vcf"), new SimpleInterval("chr20", 17959479, 17959479), workspace, 0, false, 0);
try (final GenomicsDBFeatureReader<VariantContext, PositionalBufferedStream> genomicsDBFeatureReader = new GenomicsDBFeatureReader<>(new File(workspace, GenomicsDBConstants.DEFAULT_VIDMAP_FILE_NAME).getAbsolutePath(), new File(workspace, GenomicsDBConstants.DEFAULT_CALLSETMAP_FILE_NAME).getAbsolutePath(), workspace, GenomicsDBConstants.DEFAULT_ARRAY_NAME, b38_reference_20_21, null, new BCF2Codec());
final AbstractFeatureReader<VariantContext, LineIterator> inputGVCFReader = AbstractFeatureReader.getFeatureReader(GENOMICSDB_TEST_DIR + "testHeaderContigLineSorting1.g.vcf", new VCFCodec(), true)) {
final SAMSequenceDictionary dictionaryFromGenomicsDB = ((VCFHeader) genomicsDBFeatureReader.getHeader()).getSequenceDictionary();
final SAMSequenceDictionary dictionaryFromInputGVCF = ((VCFHeader) inputGVCFReader.getHeader()).getSequenceDictionary();
Assert.assertEquals(dictionaryFromGenomicsDB, dictionaryFromInputGVCF, "Sequence dictionary from GenomicsDB does not match original sequence dictionary from input GVCF");
}
}
use of htsjdk.variant.vcf.VCFCodec in project gatk by broadinstitute.
the class GenotypeGVCFsIntegrationTest method getVariantContexts.
/**
* Returns a list of VariantContext records from a VCF file
*
* @param vcfFile VCF file
* @return list of VariantContext records
* @throws IOException if the file does not exist or can not be opened
*/
private static List<VariantContext> getVariantContexts(final File vcfFile) throws IOException {
final VCFCodec codec = new VCFCodec();
final FileInputStream s = new FileInputStream(vcfFile);
final LineIterator lineIteratorVCF = codec.makeSourceFromStream(new PositionalBufferedStream(s));
codec.readHeader(lineIteratorVCF);
final List<VariantContext> VCs = new ArrayList<>();
while (lineIteratorVCF.hasNext()) {
final String line = lineIteratorVCF.next();
Assert.assertFalse(line == null);
VCs.add(codec.decode(line));
}
return VCs;
}
Aggregations