use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.
the class GATKReadFilterPluginDescriptor method getAllInstances.
/**
* Pass back the list of ReadFilter instances that were actually seen on the command line in the same
* order they were specified. Its possible for this to return a filter that was originally included
* in the list of tool defaults only in the case where the user also specifies it on the command line.
*
* NOTE: this method is somewhat misnamed in that it doesn't return ALL instances since it leaves out
* default filters (Except as noted above). The refactored interface in Barclay renames this method and
* changes it's contract. We'll change the implementation when we integrate the updated interface.
*/
@Override
public List<ReadFilter> getAllInstances() {
final ArrayList<ReadFilter> filters = new ArrayList<>(userArgs.getUserEnabledReadFilterNames().size());
userArgs.getUserEnabledReadFilterNames().forEach(s -> {
ReadFilter rf = allDiscoveredReadFilters.get(s);
filters.add(rf);
});
return filters;
}
use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.
the class LocusWalker method getDefaultReadFilters.
/**
* Returns the default list of CommandLineReadFilters that are used for this tool. The filters returned
* by this method are subject to selective enabling/disabling by the user via the command line. The
* default implementation uses the {@link WellformedReadFilter} and {@link ReadFilterLibrary.MappedReadFilter} filter
* with all default options. Subclasses can override to provide alternative filters.
*
* Note: this method is called before command line parsing begins, and thus before a SAMFileHeader is
* available through {link #getHeaderForReads}.
*
* @return List of individual filters to be applied for this tool.
*/
public List<ReadFilter> getDefaultReadFilters() {
final List<ReadFilter> defaultFilters = new ArrayList<>(2);
defaultFilters.add(new WellformedReadFilter());
defaultFilters.add(new ReadFilterLibrary.MappedReadFilter());
return defaultFilters;
}
use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.
the class AssemblyRegionWalkerSpark method getDefaultReadFilters.
@Override
public List<ReadFilter> getDefaultReadFilters() {
final List<ReadFilter> defaultFilters = new ArrayList<>(2);
defaultFilters.add(new WellformedReadFilter());
defaultFilters.add(new ReadFilterLibrary.MappedReadFilter());
return defaultFilters;
}
use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.
the class AssemblyRegionWalker method getDefaultReadFilters.
/**
* Returns the default list of CommandLineReadFilters that are used for this tool. The filters
* returned by this method are subject to selective enabling/disabling and customization by the
* user via the command line. The default implementation uses the {@link WellformedReadFilter}
* filter with all default options, as well as the {@link ReadFilterLibrary.MappedReadFilter}.
* Subclasses can override to provide alternative filters.
*
* Note: this method is called before command line parsing begins, and thus before a SAMFileHeader is
* available through {link #getHeaderForReads}.
*
* @return List of default filter instances to be applied for this tool.
*/
public List<ReadFilter> getDefaultReadFilters() {
final List<ReadFilter> defaultFilters = new ArrayList<>(2);
defaultFilters.add(new WellformedReadFilter());
defaultFilters.add(new ReadFilterLibrary.MappedReadFilter());
return defaultFilters;
}
use of org.broadinstitute.hellbender.engine.filters.ReadFilter in project gatk by broadinstitute.
the class SparkGenomeReadCounts method collectReads.
private void collectReads() {
if (readArguments.getReadFilesNames().size() != 1) {
throw new UserException("This tool only accepts a single bam/sam/cram as input");
}
final SampleCollection sampleCollection = new SampleCollection(getHeaderForReads());
if (sampleCollection.sampleCount() > 1) {
throw new UserException.BadInput("We do not support bams with more than one sample.");
}
final String sampleName = sampleCollection.sampleIds().get(0);
final String[] commentsForRawCoverage = { "##fileFormat = tsv", "##commandLine = " + getCommandLine(), String.format("##title = Coverage counts in %d base bins for WGS", binsize) };
final ReadFilter filter = makeGenomeReadFilter();
final SAMSequenceDictionary sequenceDictionary = getReferenceSequenceDictionary();
logger.info("Starting Spark coverage collection...");
final long coverageCollectionStartTime = System.currentTimeMillis();
final JavaRDD<GATKRead> rawReads = getReads();
final JavaRDD<GATKRead> reads = rawReads.filter(read -> filter.test(read));
//Note: using a field inside a closure will pull in the whole enclosing object to serialization
// (which leads to bad performance and can blow up if some objects in the fields are not
// Serializable - closures always use java Serializable and not Kryo)
//Solution here is to use a temp variable for binsize because it's just an int.
final int binsize_tmp = binsize;
final JavaRDD<SimpleInterval> readIntervals = reads.filter(read -> sequenceDictionary.getSequence(read.getContig()) != null).map(read -> SparkGenomeReadCounts.createKey(read, sequenceDictionary, binsize_tmp));
final Map<SimpleInterval, Long> byKey = readIntervals.countByValue();
final Set<SimpleInterval> readIntervalKeySet = byKey.keySet();
final long totalReads = byKey.values().stream().mapToLong(v -> v).sum();
final long coverageCollectionEndTime = System.currentTimeMillis();
logger.info(String.format("Finished the spark coverage collection with %d targets and %d reads. Elapse of %d seconds", readIntervalKeySet.size(), totalReads, (coverageCollectionEndTime - coverageCollectionStartTime) / 1000));
final String[] commentsForProportionalCoverage = { commentsForRawCoverage[0], commentsForRawCoverage[1], String.format("##title = Proportional coverage counts in %d base bins for WGS (total reads: %d)", binsize, totalReads) };
logger.info("Creating full genome bins...");
final long createGenomeBinsStartTime = System.currentTimeMillis();
final List<SimpleInterval> fullGenomeBins = createFullGenomeBins(binsize);
List<Target> fullGenomeTargetCollection = createTargetListFromSimpleInterval(fullGenomeBins);
TargetWriter.writeTargetsToFile(new File(outputFile.getAbsolutePath() + ".targets.tsv"), fullGenomeTargetCollection);
final long createGenomeBinsEndTime = System.currentTimeMillis();
logger.info(String.format("Finished creating genome bins. Elapse of %d seconds", (createGenomeBinsEndTime - createGenomeBinsStartTime) / 1000));
logger.info("Creating missing genome bins...");
final long createMissingGenomeBinsStartTime = System.currentTimeMillis();
logger.info("Creating missing genome bins: Creating a mutable mapping...");
final Map<SimpleInterval, Long> byKeyMutable = new HashMap<>();
byKeyMutable.putAll(byKey);
logger.info("Creating missing genome bins: Populating mutable mapping with zero counts for empty regions...");
fullGenomeBins.stream().forEach(b -> byKeyMutable.putIfAbsent(b, 0l));
final long createMissingGenomeBinsEndTime = System.currentTimeMillis();
logger.info(String.format("Finished creating missing genome bins. Elapse of %d seconds", (createMissingGenomeBinsEndTime - createMissingGenomeBinsStartTime) / 1000));
logger.info("Creating final map...");
final long createFinalMapStartTime = System.currentTimeMillis();
final SortedMap<SimpleInterval, Long> byKeySorted = new TreeMap<>(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR);
byKeySorted.putAll(byKeyMutable);
final long createFinalMapEndTime = System.currentTimeMillis();
logger.info(String.format("Finished creating final map. Elapse of %d seconds", (createFinalMapEndTime - createFinalMapStartTime) / 1000));
logger.info("Creating proportional coverage... ");
final long pCovFileStartTime = System.currentTimeMillis();
final SortedMap<SimpleInterval, Double> byKeyProportionalSorted = new TreeMap<>(IntervalUtils.LEXICOGRAPHICAL_ORDER_COMPARATOR);
byKeySorted.entrySet().stream().forEach(e -> byKeyProportionalSorted.put(e.getKey(), (double) e.getValue() / totalReads));
final long pCovFileEndTime = System.currentTimeMillis();
logger.info(String.format("Finished creating proportional coverage map. Elapse of %d seconds", (pCovFileEndTime - pCovFileStartTime) / 1000));
logger.info("Writing raw coverage file ...");
final long writingCovFileStartTime = System.currentTimeMillis();
ReadCountCollectionUtils.writeReadCountsFromSimpleInterval(new File(outputFile.getAbsolutePath() + RAW_COV_OUTPUT_EXTENSION), sampleName, byKeySorted, commentsForRawCoverage);
final long writingCovFileEndTime = System.currentTimeMillis();
logger.info(String.format("Finished writing coverage file. Elapse of %d seconds", (writingCovFileEndTime - writingCovFileStartTime) / 1000));
logger.info("Writing proportional coverage file ...");
final long writingPCovFileStartTime = System.currentTimeMillis();
ReadCountCollectionUtils.writeReadCountsFromSimpleInterval(outputFile, sampleName, byKeyProportionalSorted, commentsForProportionalCoverage);
final long writingPCovFileEndTime = System.currentTimeMillis();
logger.info(String.format("Finished writing proportional coverage file. Elapse of %d seconds", (writingPCovFileEndTime - writingPCovFileStartTime) / 1000));
}
Aggregations