Search in sources :

Example 81 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class IntervalOverlappingIteratorUnitTest method getData.

@DataProvider(name = "data")
public Object[][] getData() {
    // the sequence dictionary
    final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
    dictionary.addSequence(new SAMSequenceRecord("1", 1000000));
    dictionary.addSequence(new SAMSequenceRecord("2", 1000000));
    // the set of intervals
    final List<SimpleInterval> intervals_1 = Arrays.asList(new SimpleInterval("1:500-600"), new SimpleInterval("1:700-800"));
    final List<SimpleInterval> intervals_2 = Arrays.asList(new SimpleInterval("2:100-200"), new SimpleInterval("2:400-1000"));
    // some records
    final SimpleInterval record_1_1_100 = new SimpleInterval("1:1-100");
    final SimpleInterval record_1_1_800 = new SimpleInterval("1:1-800");
    final SimpleInterval record_1_500_600 = new SimpleInterval("1:500-600");
    final SimpleInterval record_1_700_750 = new SimpleInterval("1:700-750");
    final SimpleInterval record_2_100_150 = new SimpleInterval("2:100-150");
    final SimpleInterval record_2_900_999 = new SimpleInterval("2:900-999");
    // test cases
    return new Object[][] { // first record starts before the first interval, second record overlaps the first interval
    { intervals_1, dictionary, new SimpleInterval[] { record_1_1_100, record_1_500_600, record_2_900_999 }, new SimpleInterval[] { record_1_500_600 } }, // first record starts after the first interval, second interval overlaps the first record
    { intervals_1, dictionary, new SimpleInterval[] { record_1_700_750, record_2_900_999 }, new SimpleInterval[] { record_1_700_750 } }, // first interval is on a later contig than the first record, but overlaps later records
    { intervals_2, dictionary, new SimpleInterval[] { record_1_1_100, record_2_900_999 }, new SimpleInterval[] { record_2_900_999 } }, // first interval is on an earlier contig than the first record, but later records overlap later intervals
    { ListUtils.union(intervals_1, intervals_2), dictionary, new SimpleInterval[] { record_2_100_150, record_2_900_999 }, new SimpleInterval[] { record_2_100_150, record_2_900_999 } }, // no records overlap any intervals
    { intervals_1, dictionary, new SimpleInterval[] { record_2_900_999 }, new SimpleInterval[0] }, // an interval overlaps multiple records
    { intervals_1, dictionary, new SimpleInterval[] { record_1_1_800, record_1_500_600, record_2_900_999 }, new SimpleInterval[] { record_1_1_800, record_1_500_600 } }, // a record overlaps multiple intervals
    { intervals_1, dictionary, new SimpleInterval[] { record_1_1_800, record_2_100_150 }, new SimpleInterval[] { record_1_1_800 } } };
}
Also used : SAMSequenceRecord(htsjdk.samtools.SAMSequenceRecord) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) DataProvider(org.testng.annotations.DataProvider)

Example 82 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk by broadinstitute.

the class ShardedIntervalIteratorUnitTest method getData.

@DataProvider(name = "simpleData")
public Object[][] getData() {
    final List<SimpleInterval> intervals = new ArrayList<>(2);
    intervals.add(new SimpleInterval("1", 100, 200));
    intervals.add(new SimpleInterval("1", 500, 550));
    return new Object[][] { { intervals, 1, 152 }, { intervals, 10, 11 + 6 } };
}
Also used : ArrayList(java.util.ArrayList) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) DataProvider(org.testng.annotations.DataProvider)

Example 83 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class CalculatePulldownPhasePosteriors method doWork.

@Override
public Object doWork() {
    if (!new HDF5Library().load(null)) {
        //Note: passing null means using the default temp dir.
        throw new UserException.HardwareFeatureException("Cannot load the required HDF5 library. " + "HDF5 is currently supported on x86-64 architecture and Linux or OSX systems.");
    }
    //read counts, segments, and parameters from files
    final AllelicCountCollection counts = new AllelicCountCollection(snpCountsFile);
    final List<ACNVModeledSegment> segments = SegmentUtils.readACNVModeledSegmentFile(segmentsFile);
    final AlleleFractionState state = reconstructState(segments, parametersFile);
    //load allelic-bias panel of normals if provided
    final AllelicPanelOfNormals allelicPoN = allelicPoNFile != null ? AllelicPanelOfNormals.read(allelicPoNFile) : AllelicPanelOfNormals.EMPTY_PON;
    //calculate phase posteriors
    final List<SimpleInterval> unmodeledSegments = segments.stream().map(ACNVModeledSegment::getInterval).collect(Collectors.toList());
    final AllelicCountWithPhasePosteriorsCollection countsWithPhasePosteriors = calculatePhasePosteriors(counts, unmodeledSegments, state, allelicPoN);
    //write phase posteriors to file with same verbosity as input file
    countsWithPhasePosteriors.write(outputFile, counts.getVerbosity());
    return "SUCCESS";
}
Also used : AllelicPanelOfNormals(org.broadinstitute.hellbender.tools.pon.allelic.AllelicPanelOfNormals) AllelicCountWithPhasePosteriorsCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountWithPhasePosteriorsCollection) HDF5Library(org.broadinstitute.hdf5.HDF5Library) AllelicCountCollection(org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCountCollection) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 84 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class CalculateTargetCoverage method writeOutputRows.

/**
     * Writes the row in the main matrix output file for a target and, if requested,
     * the corresponding row in the row summary output file.
     *
     * @param countBuffer  the counts for the target.
     * @param index the index of target within the target collection.
     */
private void writeOutputRows(final int[] countBuffer, final long[] columnTotals, final int index) {
    final String countString = IntStream.range(0, countBuffer.length).mapToObj(i -> transform.apply(countBuffer[i], columnTotals[i])).collect(Collectors.joining(COLUMN_SEPARATOR));
    final String targetInfoString = targetOutInfo.composeTargetOutInfoString(index, targetCollection);
    outputWriter.println(String.join(COLUMN_SEPARATOR, targetInfoString, countString));
    if (rowSummaryOutputWriter != null) {
        final long sum = MathUtils.sum(countBuffer);
        final SimpleInterval location = targetCollection.location(index);
        final int targetSize = location.size();
        rowSummaryOutputWriter.println(String.join(COLUMN_SEPARATOR, targetInfoString, Long.toString(sum), String.format(AVERAGE_DOUBLE_FORMAT, sum / ((float) countColumns.columnCount() * targetSize))));
    }
}
Also used : DocumentedFeature(org.broadinstitute.barclay.help.DocumentedFeature) IntStream(java.util.stream.IntStream) CommandLineProgramProperties(org.broadinstitute.barclay.argparser.CommandLineProgramProperties) java.util(java.util) CopyNumberProgramGroup(org.broadinstitute.hellbender.cmdline.programgroups.CopyNumberProgramGroup) Argument(org.broadinstitute.barclay.argparser.Argument) Level(org.apache.logging.log4j.Level) StandardArgumentDefinitions(org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions) GATKRead(org.broadinstitute.hellbender.utils.read.GATKRead) GATKException(org.broadinstitute.hellbender.exceptions.GATKException) Function(java.util.function.Function) ParamUtils(org.broadinstitute.hellbender.utils.param.ParamUtils) ReadWalker(org.broadinstitute.hellbender.engine.ReadWalker) ReferenceContext(org.broadinstitute.hellbender.engine.ReferenceContext) FeatureContext(org.broadinstitute.hellbender.engine.FeatureContext) PrintWriter(java.io.PrintWriter) IOUtils(org.broadinstitute.hellbender.utils.io.IOUtils) SAMSequenceDictionary(htsjdk.samtools.SAMSequenceDictionary) IOException(java.io.IOException) ReadFilter(org.broadinstitute.hellbender.engine.filters.ReadFilter) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) Collectors(java.util.stream.Collectors) File(java.io.File) Logger(org.apache.logging.log4j.Logger) MathUtils(org.broadinstitute.hellbender.utils.MathUtils) UserException(org.broadinstitute.hellbender.exceptions.UserException) Utils(org.broadinstitute.hellbender.utils.Utils) LogManager(org.apache.logging.log4j.LogManager) ReadFilterLibrary(org.broadinstitute.hellbender.engine.filters.ReadFilterLibrary) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval)

Example 85 with SimpleInterval

use of org.broadinstitute.hellbender.utils.SimpleInterval in project gatk-protected by broadinstitute.

the class CombineReadCounts method readCountFileReader.

/**
     * Creates a read-count file reader given the input files and the expected target collection.
     * @param file the input file.
     * @param targets the expected targets in the input file.
     * @return never {@code null}.
     */
private TableReader<ReadCountRecord> readCountFileReader(final File file, final TargetCollection<Target> targets) {
    try {
        return new TableReader<ReadCountRecord>(file) {

            private boolean hasName;

            private boolean hasCoordinates;

            private int countColumnCount;

            private int[] countColumnIndexes;

            @Override
            public void processColumns(final TableColumnCollection columns) {
                hasCoordinates = columns.containsAll(TargetTableColumn.CONTIG.toString(), TargetTableColumn.START.toString(), TargetTableColumn.END.toString());
                hasName = columns.contains(TargetTableColumn.NAME.toString());
                if (!hasCoordinates && !hasName) {
                    throw formatException("header contain neither coordinates nor target name columns");
                }
                final List<String> countColumnNames = readCountColumnNames(columns);
                countColumnCount = countColumnNames.size();
                countColumnIndexes = new int[countColumnCount];
                for (int i = 0; i < countColumnCount; i++) {
                    countColumnIndexes[i] = columns.indexOf(countColumnNames.get(i));
                }
            }

            @Override
            protected ReadCountRecord createRecord(final DataLine dataLine) {
                final double[] counts = new double[countColumnCount];
                final Target target = createTarget(dataLine);
                for (int i = 0; i < counts.length; i++) {
                    counts[i] = dataLine.getDouble(countColumnIndexes[i]);
                }
                return new ReadCountRecord(target, counts);
            }

            /**
                 * Extracts the target object out of a data input line.
                 * @param dataLine the input data line.
                 * @return never {@code null}.
                 */
            private Target createTarget(final DataLine dataLine) {
                if (hasName) {
                    final String name = dataLine.get(TargetTableColumn.NAME);
                    final Target target = targets.target(name);
                    final SimpleInterval interval = createInterval(dataLine);
                    if (target == null) {
                        return new Target(name, createInterval(dataLine));
                    } else if (interval != null && !interval.equals(target.getInterval())) {
                        throw new UserException.BadInput(String.format("invalid target '%s' coordinates: expected %s but found %s", name, target.getInterval(), createInterval(dataLine)));
                    } else {
                        return target;
                    }
                } else {
                    // hasCoordinates must be true.
                    final SimpleInterval interval = createInterval(dataLine);
                    final Optional<Target> target = targets.targets(interval).stream().findAny();
                    if (!target.isPresent() || !target.get().getInterval().equals(interval)) {
                        throw formatException("target not found with coordinates " + interval);
                    }
                    return target.get();
                }
            }

            /**
                 * Extract the interval out of a data line.
                 * @param dataLine the input data line.
                 * @return {@code null} if the interval cannot be determined from the input file alone.
                 */
            private SimpleInterval createInterval(final DataLine dataLine) {
                if (hasCoordinates) {
                    return new SimpleInterval(dataLine.get(TargetTableColumn.CONTIG), dataLine.getInt(TargetTableColumn.START), dataLine.getInt(TargetTableColumn.END));
                } else {
                    return null;
                }
            }
        };
    } catch (final IOException ex) {
        throw new UserException.CouldNotReadInputFile(file, ex);
    }
}
Also used : DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader)

Aggregations

SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)545 Test (org.testng.annotations.Test)287 BaseTest (org.broadinstitute.hellbender.utils.test.BaseTest)202 File (java.io.File)102 ArrayList (java.util.ArrayList)66 DataProvider (org.testng.annotations.DataProvider)64 GATKRead (org.broadinstitute.hellbender.utils.read.GATKRead)60 Collectors (java.util.stream.Collectors)53 java.util (java.util)41 SAMSequenceDictionary (htsjdk.samtools.SAMSequenceDictionary)40 AllelicCount (org.broadinstitute.hellbender.tools.exome.alleliccount.AllelicCount)40 UserException (org.broadinstitute.hellbender.exceptions.UserException)39 VariantContext (htsjdk.variant.variantcontext.VariantContext)36 IntStream (java.util.stream.IntStream)34 Target (org.broadinstitute.hellbender.tools.exome.Target)34 IOException (java.io.IOException)32 JavaSparkContext (org.apache.spark.api.java.JavaSparkContext)28 Assert (org.testng.Assert)27 Locatable (htsjdk.samtools.util.Locatable)26 List (java.util.List)26