Search in sources :

Example 1 with TableReader

use of org.broadinstitute.hellbender.utils.tsv.TableReader in project gatk by broadinstitute.

the class CombineReadCounts method readCountFileReader.

/**
     * Creates a read-count file reader given the input files and the expected target collection.
     * @param file the input file.
     * @param targets the expected targets in the input file.
     * @return never {@code null}.
     */
private TableReader<ReadCountRecord> readCountFileReader(final File file, final TargetCollection<Target> targets) {
    try {
        return new TableReader<ReadCountRecord>(file) {

            private boolean hasName;

            private boolean hasCoordinates;

            private int countColumnCount;

            private int[] countColumnIndexes;

            @Override
            public void processColumns(final TableColumnCollection columns) {
                hasCoordinates = columns.containsAll(TargetTableColumn.CONTIG.toString(), TargetTableColumn.START.toString(), TargetTableColumn.END.toString());
                hasName = columns.contains(TargetTableColumn.NAME.toString());
                if (!hasCoordinates && !hasName) {
                    throw formatException("header contain neither coordinates nor target name columns");
                }
                final List<String> countColumnNames = readCountColumnNames(columns);
                countColumnCount = countColumnNames.size();
                countColumnIndexes = new int[countColumnCount];
                for (int i = 0; i < countColumnCount; i++) {
                    countColumnIndexes[i] = columns.indexOf(countColumnNames.get(i));
                }
            }

            @Override
            protected ReadCountRecord createRecord(final DataLine dataLine) {
                final double[] counts = new double[countColumnCount];
                final Target target = createTarget(dataLine);
                for (int i = 0; i < counts.length; i++) {
                    counts[i] = dataLine.getDouble(countColumnIndexes[i]);
                }
                return new ReadCountRecord(target, counts);
            }

            /**
                 * Extracts the target object out of a data input line.
                 * @param dataLine the input data line.
                 * @return never {@code null}.
                 */
            private Target createTarget(final DataLine dataLine) {
                if (hasName) {
                    final String name = dataLine.get(TargetTableColumn.NAME);
                    final Target target = targets.target(name);
                    final SimpleInterval interval = createInterval(dataLine);
                    if (target == null) {
                        return new Target(name, createInterval(dataLine));
                    } else if (interval != null && !interval.equals(target.getInterval())) {
                        throw new UserException.BadInput(String.format("invalid target '%s' coordinates: expected %s but found %s", name, target.getInterval(), createInterval(dataLine)));
                    } else {
                        return target;
                    }
                } else {
                    // hasCoordinates must be true.
                    final SimpleInterval interval = createInterval(dataLine);
                    final Optional<Target> target = targets.targets(interval).stream().findAny();
                    if (!target.isPresent() || !target.get().getInterval().equals(interval)) {
                        throw formatException("target not found with coordinates " + interval);
                    }
                    return target.get();
                }
            }

            /**
                 * Extract the interval out of a data line.
                 * @param dataLine the input data line.
                 * @return {@code null} if the interval cannot be determined from the input file alone.
                 */
            private SimpleInterval createInterval(final DataLine dataLine) {
                if (hasCoordinates) {
                    return new SimpleInterval(dataLine.get(TargetTableColumn.CONTIG), dataLine.getInt(TargetTableColumn.START), dataLine.getInt(TargetTableColumn.END));
                } else {
                    return null;
                }
            }
        };
    } catch (final IOException ex) {
        throw new UserException.CouldNotReadInputFile(file, ex);
    }
}
Also used : DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader)

Example 2 with TableReader

use of org.broadinstitute.hellbender.utils.tsv.TableReader in project gatk-protected by broadinstitute.

the class CombineReadCounts method readCountFileReader.

/**
     * Creates a read-count file reader given the input files and the expected target collection.
     * @param file the input file.
     * @param targets the expected targets in the input file.
     * @return never {@code null}.
     */
private TableReader<ReadCountRecord> readCountFileReader(final File file, final TargetCollection<Target> targets) {
    try {
        return new TableReader<ReadCountRecord>(file) {

            private boolean hasName;

            private boolean hasCoordinates;

            private int countColumnCount;

            private int[] countColumnIndexes;

            @Override
            public void processColumns(final TableColumnCollection columns) {
                hasCoordinates = columns.containsAll(TargetTableColumn.CONTIG.toString(), TargetTableColumn.START.toString(), TargetTableColumn.END.toString());
                hasName = columns.contains(TargetTableColumn.NAME.toString());
                if (!hasCoordinates && !hasName) {
                    throw formatException("header contain neither coordinates nor target name columns");
                }
                final List<String> countColumnNames = readCountColumnNames(columns);
                countColumnCount = countColumnNames.size();
                countColumnIndexes = new int[countColumnCount];
                for (int i = 0; i < countColumnCount; i++) {
                    countColumnIndexes[i] = columns.indexOf(countColumnNames.get(i));
                }
            }

            @Override
            protected ReadCountRecord createRecord(final DataLine dataLine) {
                final double[] counts = new double[countColumnCount];
                final Target target = createTarget(dataLine);
                for (int i = 0; i < counts.length; i++) {
                    counts[i] = dataLine.getDouble(countColumnIndexes[i]);
                }
                return new ReadCountRecord(target, counts);
            }

            /**
                 * Extracts the target object out of a data input line.
                 * @param dataLine the input data line.
                 * @return never {@code null}.
                 */
            private Target createTarget(final DataLine dataLine) {
                if (hasName) {
                    final String name = dataLine.get(TargetTableColumn.NAME);
                    final Target target = targets.target(name);
                    final SimpleInterval interval = createInterval(dataLine);
                    if (target == null) {
                        return new Target(name, createInterval(dataLine));
                    } else if (interval != null && !interval.equals(target.getInterval())) {
                        throw new UserException.BadInput(String.format("invalid target '%s' coordinates: expected %s but found %s", name, target.getInterval(), createInterval(dataLine)));
                    } else {
                        return target;
                    }
                } else {
                    // hasCoordinates must be true.
                    final SimpleInterval interval = createInterval(dataLine);
                    final Optional<Target> target = targets.targets(interval).stream().findAny();
                    if (!target.isPresent() || !target.get().getInterval().equals(interval)) {
                        throw formatException("target not found with coordinates " + interval);
                    }
                    return target.get();
                }
            }

            /**
                 * Extract the interval out of a data line.
                 * @param dataLine the input data line.
                 * @return {@code null} if the interval cannot be determined from the input file alone.
                 */
            private SimpleInterval createInterval(final DataLine dataLine) {
                if (hasCoordinates) {
                    return new SimpleInterval(dataLine.get(TargetTableColumn.CONTIG), dataLine.getInt(TargetTableColumn.START), dataLine.getInt(TargetTableColumn.END));
                } else {
                    return null;
                }
            }
        };
    } catch (final IOException ex) {
        throw new UserException.CouldNotReadInputFile(file, ex);
    }
}
Also used : DataLine(org.broadinstitute.hellbender.utils.tsv.DataLine) SimpleInterval(org.broadinstitute.hellbender.utils.SimpleInterval) UserException(org.broadinstitute.hellbender.exceptions.UserException) TableColumnCollection(org.broadinstitute.hellbender.utils.tsv.TableColumnCollection) TableReader(org.broadinstitute.hellbender.utils.tsv.TableReader)

Aggregations

UserException (org.broadinstitute.hellbender.exceptions.UserException)2 SimpleInterval (org.broadinstitute.hellbender.utils.SimpleInterval)2 DataLine (org.broadinstitute.hellbender.utils.tsv.DataLine)2 TableColumnCollection (org.broadinstitute.hellbender.utils.tsv.TableColumnCollection)2 TableReader (org.broadinstitute.hellbender.utils.tsv.TableReader)2