use of htsjdk.tribble.index.tabix.TabixIndexCreator in project ASCIIGenome by dariober.
the class UcscFetch method blockCompressAndIndex.
/**
* Block compress input file and create associated tabix index. Newly created file and index are
* deleted on exit if deleteOnExit true.
* @throws IOException
* @throws InvalidRecordException
*/
private void blockCompressAndIndex(String in, String bgzfOut, boolean deleteOnExit) throws IOException, InvalidRecordException {
File inFile = new File(in);
File outFile = new File(bgzfOut);
LineIterator lin = utils.IOUtils.openURIForLineIterator(inFile.getAbsolutePath());
BlockCompressedOutputStream writer = new BlockCompressedOutputStream(outFile);
long filePosition = writer.getFilePointer();
TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.GFF);
while (lin.hasNext()) {
String line = lin.next();
GtfLine gtf = new GtfLine(line.split("\t"));
writer.write(line.getBytes());
writer.write('\n');
indexCreator.addFeature(gtf, filePosition);
filePosition = writer.getFilePointer();
}
writer.flush();
File tbi = new File(bgzfOut + TabixUtils.STANDARD_INDEX_EXTENSION);
if (tbi.exists() && tbi.isFile()) {
writer.close();
throw new RuntimeException("Index file exists: " + tbi);
}
Index index = indexCreator.finalizeIndex(writer.getFilePointer());
index.writeBasedOnFeatureFile(outFile);
writer.close();
if (deleteOnExit) {
outFile.deleteOnExit();
File idx = new File(outFile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
idx.deleteOnExit();
}
}
use of htsjdk.tribble.index.tabix.TabixIndexCreator in project ASCIIGenome by dariober.
the class MakeTabixIndex method blockCompressAndIndex.
/**
* Block compress input file and create associated tabix index.
* @throws IOException
* @throws InvalidRecordException
*/
private void blockCompressAndIndex(String intab, File bgzfOut, TabixFormat fmt) throws IOException, InvalidRecordException {
LineIterator lin = utils.IOUtils.openURIForLineIterator(intab);
BlockCompressedOutputStream writer = new BlockCompressedOutputStream(bgzfOut);
long filePosition = writer.getFilePointer();
TabixIndexCreator indexCreator = new TabixIndexCreator(fmt);
boolean first = true;
// This is relevant to vcf files only: Prepare header and codec
// ------------------------------------------------------------
VCFHeader vcfHeader = null;
VCFCodec vcfCodec = null;
if (fmt.equals(TabixFormat.VCF)) {
try {
VCFFileReader vcfr = new VCFFileReader(new File(intab), false);
// new VCFHeader();
vcfHeader = vcfr.getFileHeader();
vcfr.close();
} catch (MalformedFeatureFile e) {
vcfHeader = new VCFHeader();
}
vcfCodec = new VCFCodec();
vcfCodec.setVCFHeader(vcfHeader, Utils.getVCFHeaderVersion(vcfHeader));
}
// ------------------------------------------------------------
int nWarnings = 10;
while (lin.hasNext()) {
String line = lin.next().trim();
try {
if (line.isEmpty() || line.startsWith("track ")) {
continue;
}
if (line.startsWith("#")) {
writer.write((line + "\n").getBytes());
filePosition = writer.getFilePointer();
continue;
}
if (line.startsWith("##FASTA")) {
break;
}
if (first && !fmt.equals(TabixFormat.VCF)) {
String dummy = this.makeDummyLine(line, fmt);
addLineToIndex(dummy, indexCreator, filePosition, fmt, null, null);
writer.write(dummy.getBytes());
writer.write('\n');
filePosition = writer.getFilePointer();
first = false;
}
addLineToIndex(line, indexCreator, filePosition, fmt, vcfHeader, vcfCodec);
writer.write(line.getBytes());
writer.write('\n');
filePosition = writer.getFilePointer();
} catch (Exception e) {
if (e.getMessage().contains("added out sequence of order") || e.getMessage().contains("Features added out of order")) {
// Get a string marker for out-of-order from htsjdk/tribble/index/tabix/TabixIndexCreator.java
throw new InvalidRecordException();
}
if (nWarnings >= 0) {
System.err.println("Warning: " + e.getMessage() + ". Skipping:\n" + line);
}
if (nWarnings == 0) {
System.err.println("Additional warnings will not be show.");
}
nWarnings--;
}
}
writer.flush();
Index index = indexCreator.finalizeIndex(writer.getFilePointer());
index.writeBasedOnFeatureFile(bgzfOut);
writer.close();
CloserUtil.close(lin);
}
use of htsjdk.tribble.index.tabix.TabixIndexCreator in project jvarkit by lindenb.
the class BedIndexTabix method run.
protected void run(LineIterator in) throws IOException {
int bedLineCount = 0;
File tbi = new File(outputFile.getPath() + TabixUtils.STANDARD_INDEX_EXTENSION);
BlockCompressedOutputStream writer = null;
SortingCollection<String> sorter = null;
final Comparator<String> comparator = new Comparator<String>() {
@Override
public int compare(String o1, String o2) {
BedLine bed1 = bedCodec.decode(o1);
BedLine bed2 = bedCodec.decode(o2);
int i = bed1.getContig().compareTo(bed2.getContig());
if (i != 0)
return i;
i = bed1.getStart() - bed2.getStart();
if (i != 0)
return i;
i = bed1.getEnd() - bed2.getEnd();
if (i != 0)
return i;
return o1.compareTo(o2);
}
};
CloseableIterator<String> iter = null;
try {
TabixIndexCreator indexCreator = new TabixIndexCreator(TabixFormat.BED);
LOG.info("Opening" + outputFile);
writer = new BlockCompressedOutputStream(this.outputFile);
StringBuilder header = new StringBuilder();
while (in.hasNext()) {
String h = in.peek();
if (!BedLine.isBedHeader(h))
break;
header.append(in.next()).append('\n');
}
// write header
if (header.length() > 0) {
LOG.info("Writing header");
writer.write(header.toString().getBytes());
}
if (this.sort) {
LOG.info("Sorting");
sorter = SortingCollection.newInstance(String.class, new BedDataCodec(), comparator, this.writingSortingCollection.getMaxRecordsInRam(), this.writingSortingCollection.getTmpPaths());
while (in.hasNext()) {
String line = in.next();
BedLine bed = bedCodec.decode(line);
if (bed == null)
continue;
sorter.add(line);
}
sorter.doneAdding();
sorter.setDestructiveIteration(true);
iter = sorter.iterator();
long filePosition = writer.getFilePointer();
while (iter.hasNext()) {
String line = iter.next();
BedLine bed = this.bedCodec.decode(line);
writer.write(line.getBytes());
writer.write('\n');
indexCreator.addFeature(bed, filePosition);
filePosition = writer.getFilePointer();
}
sorter.cleanup();
} else {
long filePosition = writer.getFilePointer();
while (in.hasNext()) {
String line = in.next();
BedLine bed = this.bedCodec.decode(line);
if (bed == null)
continue;
writer.write(line.getBytes());
writer.write('\n');
indexCreator.addFeature(bed, filePosition);
filePosition = writer.getFilePointer();
}
}
writer.flush();
LOG.info("Creating index");
Index index = indexCreator.finalizeIndex(writer.getFilePointer());
LOG.info("Writing index to " + tbi + " using " + index.getClass());
index.writeBasedOnFeatureFile(this.outputFile);
writer.close();
writer = null;
LOG.info("Done N=" + bedLineCount);
} catch (Exception e) {
if (this.outputFile.exists() && this.outputFile.isFile()) {
LOG.warning("Deleting " + this.outputFile);
this.outputFile.delete();
if (tbi.exists() && tbi.isFile())
tbi.delete();
}
throw new IOException(e);
} finally {
CloserUtil.close(iter);
CloserUtil.close(sorter);
CloserUtil.close(writer);
}
}
Aggregations