use of htsjdk.samtools.util.CloseableIterator in project gatk by broadinstitute.
the class ReadsDataSource method prepareIteratorsForTraversal.
/**
* Prepare iterators over all readers in response to a request for a complete iteration or query
*
* @param queryIntervals Intervals to bound the iteration (reads must overlap one of these intervals). If null, iteration is unbounded.
* @return Iterator over all reads in this data source, limited to overlap with the supplied intervals
*/
private Iterator<GATKRead> prepareIteratorsForTraversal(final List<SimpleInterval> queryIntervals, final boolean queryUnmapped) {
// htsjdk requires that only one iterator be open at a time per reader, so close out
// any previous iterations
closePreviousIterationsIfNecessary();
final boolean traversalIsBounded = (queryIntervals != null && !queryIntervals.isEmpty()) || queryUnmapped;
// Set up an iterator for each reader, bounded to overlap with the supplied intervals if there are any
for (Map.Entry<SamReader, CloseableIterator<SAMRecord>> readerEntry : readers.entrySet()) {
if (traversalIsBounded) {
readerEntry.setValue(new SamReaderQueryingIterator(readerEntry.getKey(), readers.size() > 1 ? getIntervalsOverlappingReader(readerEntry.getKey(), queryIntervals) : queryIntervals, queryUnmapped));
} else {
readerEntry.setValue(readerEntry.getKey().iterator());
}
}
// Create a merging iterator over all readers if necessary. In the case where there's only a single reader,
// return its iterator directly to avoid the overhead of the merging iterator.
Iterator<SAMRecord> startingIterator = null;
if (readers.size() == 1) {
startingIterator = readers.entrySet().iterator().next().getValue();
} else {
startingIterator = new MergingSamRecordIterator(headerMerger, readers, true);
}
return new SAMRecordToReadIterator(startingIterator);
}
use of htsjdk.samtools.util.CloseableIterator in project gatk by broadinstitute.
the class CreateSomaticPanelOfNormals method doWork.
public Object doWork() {
final List<File> inputVcfs = new ArrayList<>(vcfs);
final Collection<CloseableIterator<VariantContext>> iterators = new ArrayList<>(inputVcfs.size());
final Collection<VCFHeader> headers = new HashSet<>(inputVcfs.size());
final VCFHeader headerOfFirstVcf = new VCFFileReader(inputVcfs.get(0), false).getFileHeader();
final SAMSequenceDictionary sequenceDictionary = headerOfFirstVcf.getSequenceDictionary();
final VariantContextComparator comparator = headerOfFirstVcf.getVCFRecordComparator();
for (final File vcf : inputVcfs) {
final VCFFileReader reader = new VCFFileReader(vcf, false);
iterators.add(reader.iterator());
final VCFHeader header = reader.getFileHeader();
Utils.validateArg(comparator.isCompatible(header.getContigLines()), () -> vcf.getAbsolutePath() + " has incompatible contigs.");
headers.add(header);
}
final VariantContextWriter writer = GATKVariantContextUtils.createVCFWriter(outputVcf, sequenceDictionary, false, Options.INDEX_ON_THE_FLY);
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false)));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(comparator, iterators);
SimpleInterval currentPosition = new SimpleInterval("FAKE", 1, 1);
final List<VariantContext> variantsAtThisPosition = new ArrayList<>(20);
while (mergingIterator.hasNext()) {
final VariantContext vc = mergingIterator.next();
if (!currentPosition.overlaps(vc)) {
processVariantsAtSamePosition(variantsAtThisPosition, writer);
variantsAtThisPosition.clear();
currentPosition = new SimpleInterval(vc.getContig(), vc.getStart(), vc.getStart());
}
variantsAtThisPosition.add(vc);
}
mergingIterator.close();
writer.close();
return "SUCCESS";
}
use of htsjdk.samtools.util.CloseableIterator in project gatk by broadinstitute.
the class MergeVcfs method doWork.
@Override
protected Object doWork() {
final ProgressLogger progress = new ProgressLogger(logger, 10000);
final List<String> sampleList = new ArrayList<>();
final Collection<CloseableIterator<VariantContext>> iteratorCollection = new ArrayList<>(INPUT.size());
final Collection<VCFHeader> headers = new HashSet<>(INPUT.size());
VariantContextComparator variantContextComparator = null;
SAMSequenceDictionary sequenceDictionary = null;
if (SEQUENCE_DICTIONARY != null) {
sequenceDictionary = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(SEQUENCE_DICTIONARY).getFileHeader().getSequenceDictionary();
}
for (final File file : INPUT) {
IOUtil.assertFileIsReadable(file);
final VCFFileReader fileReader = new VCFFileReader(file, false);
final VCFHeader fileHeader = fileReader.getFileHeader();
if (variantContextComparator == null) {
variantContextComparator = fileHeader.getVCFRecordComparator();
} else {
if (!variantContextComparator.isCompatible(fileHeader.getContigLines())) {
throw new IllegalArgumentException("The contig entries in input file " + file.getAbsolutePath() + " are not compatible with the others.");
}
}
if (sequenceDictionary == null)
sequenceDictionary = fileHeader.getSequenceDictionary();
if (sampleList.isEmpty()) {
sampleList.addAll(fileHeader.getSampleNamesInOrder());
} else {
if (!sampleList.equals(fileHeader.getSampleNamesInOrder())) {
throw new IllegalArgumentException("Input file " + file.getAbsolutePath() + " has sample entries that don't match the other files.");
}
}
headers.add(fileHeader);
iteratorCollection.add(fileReader.iterator());
}
if (CREATE_INDEX && sequenceDictionary == null) {
throw new UserException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
}
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().setOutputFile(OUTPUT).setReferenceDictionary(sequenceDictionary).clearOptions();
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
try (final VariantContextWriter writer = builder.build()) {
writer.writeHeader(new VCFHeader(VCFUtils.smartMergeHeaders(headers, false), sampleList));
final MergingIterator<VariantContext> mergingIterator = new MergingIterator<>(variantContextComparator, iteratorCollection);
while (mergingIterator.hasNext()) {
final VariantContext context = mergingIterator.next();
writer.add(context);
progress.record(context.getContig(), context.getStart());
}
CloserUtil.close(mergingIterator);
}
return null;
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class Bam2Wig method doWork.
@Override
public int doWork(final List<String> args) {
if (this.win_shift <= 0) {
LOG.error("window shift<=0");
return -1;
}
if (this.window_span <= 0) {
LOG.error("window size<=0");
return -1;
}
final Interval interval;
PrintWriter pw = null;
CloseableIterator<SAMRecord> samRecordIterator = null;
final List<SamReader> samReaders = new ArrayList<>();
final List<CloseableIterator<SAMRecord>> merginIterators = new ArrayList<>();
try {
final SamReaderFactory srf = SamReaderFactory.makeDefault().validationStringency(htsjdk.samtools.ValidationStringency.LENIENT);
if (args.isEmpty()) {
if (!StringUtil.isBlank(region_str)) {
LOG.error("cannot specify region for stdin");
return -1;
}
interval = null;
samReaders.add(srf.open(SamInputResource.of(stdin())));
samRecordIterator = samReaders.get(0).iterator();
} else if (args.size() == 1 && !args.get(0).endsWith(".list")) {
samReaders.add(srf.open(SamInputResource.of(new File(args.get(0)))));
if (StringUtil.isBlank(this.region_str)) {
samRecordIterator = samReaders.get(0).iterator();
interval = null;
} else {
interval = new IntervalParser(samReaders.get(0).getFileHeader().getSequenceDictionary()).setContigNameIsWholeContig(true).parse(region_str);
if (interval == null) {
LOG.error("Cannot parse interval " + this.region_str);
return -1;
}
LOG.debug("interval " + interval);
samRecordIterator = samReaders.get(0).query(interval.getContig(), interval.getStart(), interval.getEnd(), false);
}
} else {
final List<File> samFiles;
if (args.size() == 1 && args.get(0).endsWith(".list")) {
samFiles = IOUtils.unrollFile(new File(args.get(0)));
} else {
samFiles = args.stream().map(S -> new File(S)).collect(Collectors.toList());
}
if (samFiles.isEmpty()) {
LOG.error("No Input SAM file");
return -1;
}
final SAMSequenceDictionary dict0 = SAMSequenceDictionaryExtractor.extractDictionary(samFiles.get(0));
if (dict0 == null)
throw new JvarkitException.DictionaryMissing(samFiles.get(0).getPath());
samFiles.stream().forEach(F -> {
final SAMSequenceDictionary dicti = SAMSequenceDictionaryExtractor.extractDictionary(F);
if (dicti == null)
throw new JvarkitException.DictionaryMissing(F.getPath());
if (!SequenceUtil.areSequenceDictionariesEqual(dicti, dict0)) {
throw new JvarkitException.DictionariesAreNotTheSame(dict0, dicti);
}
});
for (final File bamFile : samFiles) {
LOG.info("opening " + bamFile);
samReaders.add(srf.open(bamFile));
}
final SamFileHeaderMerger mergedheader = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, samReaders.stream().map(SR -> SR.getFileHeader()).collect(Collectors.toList()), false);
final Map<SamReader, CloseableIterator<SAMRecord>> reader2iter = new HashMap<>();
if (StringUtil.isBlank(this.region_str)) {
for (final SamReader sr : samReaders) {
reader2iter.put(sr, sr.iterator());
}
interval = null;
} else {
interval = new IntervalParser(dict0).setContigNameIsWholeContig(true).parse(region_str);
if (interval == null) {
LOG.error("Cannot parse interval " + this.region_str);
return -1;
}
LOG.info("interval :" + interval);
for (final SamReader sr : samReaders) {
reader2iter.put(sr, sr.query(interval.getContig(), interval.getStart(), interval.getEnd(), false));
}
}
merginIterators.addAll(reader2iter.values());
samRecordIterator = new MergingSamRecordIterator(mergedheader, reader2iter, true);
}
for (final SamReader sr : samReaders) {
if (sr.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
LOG.error("one of your bam input is not sorted on coordinate");
return -1;
}
}
pw = openFileOrStdoutAsPrintWriter(this.outputFile);
run(pw, samRecordIterator, samReaders.get(0).getFileHeader().getSequenceDictionary(), interval);
samRecordIterator.close();
samRecordIterator = null;
CloserUtil.close(samReaders);
samReaders.clear();
pw.flush();
return RETURN_OK;
} catch (final Exception err) {
LOG.error(err);
return -1;
} finally {
CloserUtil.close(merginIterators);
CloserUtil.close(samRecordIterator);
CloserUtil.close(samReaders);
CloserUtil.close(pw);
pw = null;
}
}
use of htsjdk.samtools.util.CloseableIterator in project jvarkit by lindenb.
the class VcfEpistatis01 method doWork.
@Override
public int doWork(final List<String> args) {
if (this.number_of_jobs < 1) {
LOG.error("bad number of jobs");
return -1;
}
try {
final int variantsCount;
final List<VariantContext> inMemoryVariants;
final File vcfFile = new File(oneAndOnlyOneFile(args));
final File tmpIndexFile;
if (vcfFile.equals(this.outputFile)) {
LOG.error("input == output");
return -1;
}
VCFFileReader vcfFileReader = new VCFFileReader(vcfFile, false);
final VCFHeader header = vcfFileReader.getFileHeader();
final Pedigree pedigree;
if (this.pedigreeFile != null) {
pedigree = new Pedigree.Parser().parse(this.pedigreeFile);
} else {
pedigree = new Pedigree.Parser().parse(header);
}
pedigree.verifyPersonsHaveUniqueNames();
final Map<String, Integer> sample2index = header.getSampleNameToOffset();
final int[] caseIndexes = pedigree.getAffected().stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
final int[] ctrlIndexes = new ArrayList<>(pedigree.getUnaffected()).stream().filter(P -> sample2index.containsKey(P.getId())).mapToInt(P -> sample2index.get(P.getId())).sorted().toArray();
if (caseIndexes.length == 0 || ctrlIndexes.length == 0) {
LOG.error("empty ped or no case/ctrl");
vcfFileReader.close();
return -1;
}
if (this.load_variants_in_memory) {
LOG.info("loading variants in memory");
tmpIndexFile = null;
final CloseableIterator<VariantContext> iter2 = vcfFileReader.iterator();
inMemoryVariants = Collections.unmodifiableList(iter2.stream().filter(this.variantFilter).filter(// should fix https://github.com/samtools/htsjdk/issues/1026 ?
V -> V.getGenotypes().stream().filter(G -> G.isCalled()).count() > 0).collect(Collectors.toList()));
variantsCount = inMemoryVariants.size();
iter2.close();
} else {
tmpIndexFile = File.createTempFile("epistatsis", VcfOffsetsIndexFactory.INDEX_EXTENSION);
tmpIndexFile.deleteOnExit();
new VcfOffsetsIndexFactory().setLogger(LOG).setPredicate(variantFilter).indexVcfFile(vcfFile, tmpIndexFile);
final VcfList tmpList = VcfList.fromFile(vcfFile, tmpIndexFile);
variantsCount = tmpList.size();
tmpList.close();
inMemoryVariants = null;
}
vcfFileReader.close();
LOG.info("Number of variants: " + variantsCount);
Result bestResult = null;
int x = this.start_index_at;
while (x + 1 < variantsCount) {
final List<Runner> runners = new Vector<>(this.number_of_jobs);
while (x + 1 < variantsCount && runners.size() < this.number_of_jobs) {
LOG.info("starting " + x + "/" + variantsCount);
runners.add(new Runner(inMemoryVariants == null ? VcfList.fromFile(vcfFile, tmpIndexFile) : new Vector<>(inMemoryVariants), x, caseIndexes, ctrlIndexes));
++x;
}
final ExecutorService execSvc;
if (this.number_of_jobs == 1) {
execSvc = null;
} else {
execSvc = Executors.newFixedThreadPool(this.number_of_jobs);
;
}
if (this.number_of_jobs == 1) {
runners.get(0).call();
} else {
execSvc.invokeAll(runners);
}
if (execSvc != null) {
execSvc.shutdown();
execSvc.awaitTermination(10000L, TimeUnit.DAYS);
execSvc.shutdownNow();
}
runners.stream().mapToLong(R -> R.duration).min().ifPresent(D -> {
LOG.info("That took " + (D / 1000f) + " seconds.");
});
for (final Runner r : runners) {
final Result rez = r.result;
if (rez == null)
continue;
if (bestResult == null || bestResult.score < rez.score) {
bestResult = rez;
if (this.output_score) {
final PrintWriter pw = super.openFileOrStdoutAsPrintWriter(this.outputFile);
pw.println(bestResult.score + "\t" + bestResult.toString());
pw.flush();
pw.close();
} else {
final VariantContextWriter w = openVariantContextWriter(this.outputFile);
final VCFHeader header2 = new VCFHeader(header);
header2.addMetaDataLine(new VCFHeaderLine(VcfEpistatis01.class.getName(), bestResult.toString()));
w.writeHeader(header2);
w.add(bestResult.ctx1);
w.add(bestResult.ctx2);
w.close();
}
}
}
LOG.info("best: " + bestResult);
}
if (tmpIndexFile != null)
tmpIndexFile.delete();
return 0;
} catch (final Exception err) {
err.printStackTrace();
LOG.error(err);
return -1;
} finally {
}
}
Aggregations