use of htsjdk.samtools.filter.InsertSizeFilter in project gatk by broadinstitute.
the class CollectOxoGMetrics method doWork.
@Override
protected Object doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
if (INTERVALS != null)
IOUtil.assertFileIsReadable(INTERVALS);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final SamReader in = SamReaderFactory.makeDefault().open(INPUT);
final Set<String> samples = new HashSet<>();
final Set<String> libraries = new HashSet<>();
for (final SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
samples.add(getOrElse(rec.getSample(), UNKNOWN_SAMPLE));
libraries.add(getOrElse(rec.getLibrary(), UNKNOWN_LIBRARY));
}
// Setup the calculators
final Set<String> contexts = CONTEXTS.isEmpty() ? makeContextStrings(CONTEXT_SIZE) : CONTEXTS;
final ListMap<String, Calculator> calculators = new ListMap<>();
for (final String context : contexts) {
for (final String library : libraries) {
calculators.add(context, new Calculator(library, context));
}
}
// Load up dbSNP if available
logger.info("Loading dbSNP File: " + DB_SNP);
final DbSnpBitSetUtil dbSnp;
if (DB_SNP != null)
dbSnp = new DbSnpBitSetUtil(DB_SNP, in.getFileHeader().getSequenceDictionary());
else
dbSnp = null;
// Make an iterator that will filter out funny looking things
final SamLocusIterator iterator;
if (INTERVALS != null) {
final IntervalList intervals = IntervalList.fromFile(INTERVALS);
iterator = new SamLocusIterator(in, intervals.uniqued(), false);
} else {
iterator = new SamLocusIterator(in);
}
iterator.setEmitUncoveredLoci(false);
iterator.setMappingQualityScoreCutoff(MINIMUM_MAPPING_QUALITY);
final List<SamRecordFilter> filters = new ArrayList<>();
filters.add(new NotPrimaryAlignmentFilter());
filters.add(new DuplicateReadFilter());
if (MINIMUM_INSERT_SIZE > 0 || MAXIMUM_INSERT_SIZE > 0) {
filters.add(new InsertSizeFilter(MINIMUM_INSERT_SIZE, MAXIMUM_INSERT_SIZE));
}
iterator.setSamFilters(filters);
logger.info("Starting iteration.");
long nextLogTime = 0;
int sites = 0;
for (final SamLocusIterator.LocusInfo info : iterator) {
// Skip dbSNP sites
final String chrom = info.getSequenceName();
final int pos = info.getPosition();
final int index = pos - 1;
if (dbSnp != null && dbSnp.isDbSnpSite(chrom, pos))
continue;
// Skip sites at the end of chromosomes
final byte[] bases = refWalker.get(info.getSequenceIndex()).getBases();
if (pos < 3 || pos > bases.length - 3)
continue;
// Skip non C-G bases
final byte base = StringUtil.toUpperCase(bases[index]);
if (base != 'C' && base != 'G')
continue;
// Get the context string
final String context;
{
final String tmp = StringUtil.bytesToString(bases, index - CONTEXT_SIZE, 1 + (2 * CONTEXT_SIZE)).toUpperCase();
if (base == 'C')
context = tmp;
else
/* if G */
context = SequenceUtil.reverseComplement(tmp);
}
final List<Calculator> calculatorsForContext = calculators.get(context);
// happens if we get ambiguous bases in the reference
if (calculatorsForContext == null)
continue;
for (final Calculator calc : calculatorsForContext) calc.accept(info, base);
// See if we need to stop
if (++sites % 100 == 0) {
final long now = System.currentTimeMillis();
if (now > nextLogTime) {
logger.info("Visited " + sites + " sites of interest. Last site: " + chrom + ":" + pos);
nextLogTime = now + 60000;
}
}
if (sites >= STOP_AFTER)
break;
}
final MetricsFile<CpcgMetrics, Integer> file = getMetricsFile();
for (final List<Calculator> calcs : calculators.values()) {
for (final Calculator calc : calcs) {
final CpcgMetrics m = calc.finish();
m.SAMPLE_ALIAS = StringUtil.join(",", new ArrayList<>(samples));
file.addMetric(m);
}
}
file.write(OUTPUT);
CloserUtil.close(in);
return null;
}
Aggregations