use of htsjdk.tribble.index.Index in project jvarkit by lindenb.
the class EvsDumpXml method doWork.
private int doWork() {
try {
this.xmlInputFactory = XMLInputFactory.newFactory();
TransformerFactory factory = TransformerFactory.newInstance();
this.transformer = factory.newTransformer();
this.transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
if (this.doSort) {
this.sortingCollection = SortingCollection.newInstance(String.class, new SnpStringCodec(), new SnpDataComparator(), this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
this.sortingCollection.setDestructiveIteration(true);
}
final List<Fetcher> fetchers = new ArrayList<Fetcher>(24);
fetchers.add(fetch("1", 249250621));
fetchers.add(fetch("2", 243199373));
fetchers.add(fetch("3", 198022430));
fetchers.add(fetch("4", 191154276));
fetchers.add(fetch("5", 180915260));
fetchers.add(fetch("6", 171115067));
fetchers.add(fetch("7", 159138663));
fetchers.add(fetch("8", 146364022));
fetchers.add(fetch("9", 141213431));
fetchers.add(fetch("10", 135534747));
fetchers.add(fetch("11", 135006516));
fetchers.add(fetch("12", 133851895));
fetchers.add(fetch("13", 115169878));
fetchers.add(fetch("14", 107349540));
fetchers.add(fetch("15", 102531392));
fetchers.add(fetch("16", 90354753));
fetchers.add(fetch("17", 81195210));
fetchers.add(fetch("18", 78077248));
fetchers.add(fetch("19", 59128983));
fetchers.add(fetch("20", 63025520));
fetchers.add(fetch("21", 48129895));
fetchers.add(fetch("22", 51304566));
fetchers.add(fetch("X", 155270560));
// fetch("Y",59373566); not in evs
// fetch("M",16571);
this.genome_total_size = 0L;
this.genome_curr_size = 0L;
for (Fetcher fetcher : fetchers) {
this.genome_total_size += fetcher.length;
}
DynamicIndexCreator indexer = null;
if (this.outfilename != null) {
LOG.info("Opening " + this.outfilename);
this.outputstream = new LocationAwareOutputStream(new FileOutputStream(this.outfilename));
indexer = new DynamicIndexCreator(this.outfilename, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
} else {
this.outputstream = new LocationAwareOutputStream(System.out);
}
// print header
final String xml_header = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<evsData xmlns=\"http://webservice.evs.gs.washington.edu/\">\n";
this.outputstream.write(xml_header.getBytes());
for (Fetcher fetcher : fetchers) {
fetcher.run();
this.genome_curr_size += fetcher.length;
}
if (this.sortingCollection != null) {
SnpDataBinding snpDataBinding = new SnpDataBinding();
this.sortingCollection.doneAdding();
String prev = null;
CloseableIterator<String> iter = sortingCollection.iterator();
while (iter.hasNext()) {
String s = iter.next();
if (prev != null && prev.equals(s)) {
continue;
}
long position = outputstream.getPosition();
outputstream.write(s.getBytes());
// important SnpDataCodec needs separate lines
outputstream.write('\n');
if (indexer != null) {
SnpData sd = snpDataBinding.convert(s);
indexer.addFeature(new SnpDataFeature(sd), position);
}
prev = s;
}
iter.close();
}
long last_index = this.outputstream.getPosition();
final String xml_footer = "</evsData>\n";
this.outputstream.write(xml_footer.getBytes());
this.outputstream.flush();
this.outputstream.close();
if (indexer != null) {
LOG.info("Writing index");
final Index index = indexer.finalizeIndex(last_index);
index.writeBasedOnFeatureFile(this.outfilename);
}
} catch (Exception e) {
e.printStackTrace();
return -1;
} finally {
if (this.sortingCollection != null)
this.sortingCollection.cleanup();
}
return 0;
}
use of htsjdk.tribble.index.Index in project jvarkit by lindenb.
the class JfxNgs method doMenuIndexVcf.
/**
* open index a VCF file
*/
private void doMenuIndexVcf(final Window owner) {
final FileChooser fc = newFileChooser();
fc.getExtensionFilters().addAll(VcfStage.EXTENSION_FILTERS);
final List<File> files = fc.showOpenMultipleDialog(owner);
if (files == null)
return;
for (final File file : files) {
updateLastDir(file);
if (file.getName().endsWith(".vcf.gz")) {
LOG.info("writing tabix index for " + file);
final File output = new File(file.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
try {
if (output.exists()) {
throw new IOException("Tabix index " + output + " already exists.");
}
final TabixIndex index = IndexFactory.createTabixIndex(file, new VCFCodec(), (SAMSequenceDictionary) null);
index.write(output);
final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
alert.showAndWait();
} catch (final Exception err) {
showExceptionDialog(owner, err);
break;
}
} else if (file.getName().endsWith(".vcf")) {
LOG.info("writing tribble index for " + file);
final File output = new File(file.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
try {
if (output.exists()) {
throw new IOException("Tribble index " + output + " already exists.");
}
final Index index = IndexFactory.createIndex(file, new VCFCodec(), IndexType.LINEAR);
index.writeBasedOnFeatureFile(file);
final Alert alert = new Alert(AlertType.CONFIRMATION, "Done. ?", ButtonType.OK);
alert.showAndWait();
} catch (final Exception err) {
showExceptionDialog(owner, err);
break;
}
} else {
showExceptionDialog(owner, "Cannot index file " + file);
break;
}
}
}
use of htsjdk.tribble.index.Index in project jvarkit by lindenb.
the class BedIndexTabix method run.
protected void run(LineIterator in) throws IOException {
int bedLineCount = 0;
File tbi = new File(outputFile.getPath() + TabixUtils.STANDARD_INDEX_EXTENSION);
BlockCompressedOutputStream writer = null;
SortingCollection<String> sorter = null;
final Comparator<String> comparator = new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
BedLine bed1 = bedCodec.decode(o1);
BedLine bed2 = bedCodec.decode(o2);
int i = bed1.getContig().compareTo(bed2.getContig());
if (i != 0)
return i;
i = bed1.getStart() - bed2.getStart();
if (i != 0)
return i;
i = bed1.getEnd() - bed2.getEnd();
if (i != 0)
return i;
return o1.compareTo(o2);
}
};
CloseableIterator<String> iter = null;
try {
TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.BED);
LOG.info("Opening" + outputFile);
writer = new BlockCompressedOutputStream(this.outputFile);
StringBuilder header = new StringBuilder();
while (in.hasNext()) {
String h = in.peek();
if (!BedLine.isBedHeader(h))
break;
header.append(in.next()).append('\n');
}
// write header
if (header.length() > 0) {
LOG.info("Writing header");
writer.write(header.toString().getBytes());
}
if (this.sort) {
LOG.info("Sorting");
sorter = SortingCollection.newInstance(String.class, new BedDataCodec(), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (in.hasNext()) {
String line = in.next();
BedLine bed = bedCodec.decode(line);
if (bed == null)
continue;
sorter.add(line);
}
sorter.doneAdding();
sorter.setDestructiveIteration(true);
iter = sorter.iterator();
long filePosition = writer.getFilePointer();
while (iter.hasNext()) {
String line = iter.next();
BedLine bed = this.bedCodec.decode(line);
writer.write(line.getBytes());
writer.write('\n');
indexCreator.addFeature(bed, filePosition);
filePosition = writer.getFilePointer();
}
sorter.cleanup();
} else {
long filePosition = writer.getFilePointer();
while (in.hasNext()) {
String line = in.next();
BedLine bed = this.bedCodec.decode(line);
if (bed == null)
continue;
writer.write(line.getBytes());
writer.write('\n');
indexCreator.addFeature(bed, filePosition);
filePosition = writer.getFilePointer();
}
}
writer.flush();
LOG.info("Creating index");
Index index = indexCreator.finalizeIndex(writer.getFilePointer());
LOG.info("Writing index to " + tbi + " using " + index.getClass());
index.writeBasedOnFeatureFile(this.outputFile);
writer.close();
writer = null;
LOG.info("Done N=" + bedLineCount);
} catch (Exception e) {
if (this.outputFile.exists() && this.outputFile.isFile()) {
LOG.warning("Deleting " + this.outputFile);
this.outputFile.delete();
if (tbi.exists() && tbi.isFile())
tbi.delete();
}
throw new IOException(e);
} finally {
CloserUtil.close(iter);
CloserUtil.close(sorter);
CloserUtil.close(writer);
}
}
use of htsjdk.tribble.index.Index in project gatk by broadinstitute.
the class IndexUtilsUnitTest method testLoadTabixIndex.
@Test(dataProvider = "okFeatureFilesTabix")
public void testLoadTabixIndex(final File featureFile) throws Exception {
final Index index = IndexUtils.loadTabixIndex(featureFile);
Assert.assertNotNull(index);
}
use of htsjdk.tribble.index.Index in project gatk by broadinstitute.
the class IndexUtilsUnitTest method testCheckIndexModificationTime.
@Test
public void testCheckIndexModificationTime() throws Exception {
final File vcf = new File(getToolTestDataDir(), "test_variants_for_index.vcf");
final File vcfIdx = new File(getToolTestDataDir(), "test_variants_for_index.vcf.idx");
final Index index = IndexFactory.loadIndex(vcfIdx.getAbsolutePath());
//no blowup
IndexUtils.checkIndexVersionAndModificationTime(vcf, vcfIdx, index);
}
Aggregations