use of com.intel.genomicsdb.GenomicsDBImporter in project gatk by broadinstitute.
the class GenomicsDBImport method traverse.
/**
* A complete traversal from start to finish. This method will import all samples
* specified in the input GVCF files.
*/
@Override
public void traverse() {
// Force the progress meter to update after every batch
progressMeter.setRecordsBetweenTimeChecks(1L);
final int sampleCount = sampleNameToVcfUri.size();
final int updatedBatchSize = (batchSize == DEFAULT_ZERO_BATCH_SIZE) ? sampleCount : batchSize;
final int totalBatchCount = (sampleCount / updatedBatchSize) + (sampleCount % updatedBatchSize == 0 ? 0 : 1);
GenomicsDBImporter importer;
for (int i = 0, batchCount = 1; i < sampleCount; i += updatedBatchSize, ++batchCount) {
final Map<String, FeatureReader<VariantContext>> sampleToReaderMap = getFeatureReaders(new ArrayList<>(sampleNameToVcfUri.keySet()), sampleNameToVcfUri, updatedBatchSize, sampleCount, i);
logger.info("Importing batch " + batchCount + " with " + sampleToReaderMap.size() + " samples");
final long variantContextBufferSize = vcfBufferSizePerSample * sampleToReaderMap.size();
final GenomicsDBImportConfiguration.ImportConfiguration importConfiguration = createImportConfiguration(workspace, GenomicsDBConstants.DEFAULT_ARRAY_NAME, variantContextBufferSize, segmentSize, i, (i + updatedBatchSize - 1));
try {
importer = new GenomicsDBImporter(sampleToReaderMap, mergedHeaderLines, intervals.get(0), importConfiguration);
} catch (final IOException e) {
throw new UserException("Error initializing GenomicsDBImporter in batch " + batchCount, e);
}
try {
importer.importBatch();
} catch (final IOException e) {
throw new UserException("GenomicsDB import failed in batch " + batchCount, e);
}
closeReaders(sampleToReaderMap);
progressMeter.update(intervals.get(0));
logger.info("Done importing batch " + batchCount + "/" + totalBatchCount);
}
}
Aggregations