Search in sources :

Example 1 with RangeInputSplit

use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.

the class HiveAccumuloTableInputFormat method getSplits.

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
    final Instance instance = accumuloParams.getInstance();
    final ColumnMapper columnMapper;
    try {
        columnMapper = getColumnMapper(jobConf);
    } catch (TooManyAccumuloColumnsException e) {
        throw new IOException(e);
    }
    JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf));
    Path[] tablePaths = FileInputFormat.getInputPaths(context);
    try {
        UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
        final Connector connector;
        // Need to get a Connector so we look up the user's authorizations if not otherwise specified
        if (accumuloParams.useSasl() && !ugi.hasKerberosCredentials()) {
            // In a YARN/Tez job, don't have the Kerberos credentials anymore, use the delegation token
            AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(AccumuloInputFormat.class, jobConf);
            // Convert the stub from the configuration back into a normal Token
            // More reflection to support 1.6
            token = helper.unwrapAuthenticationToken(jobConf, token);
            connector = instance.getConnector(accumuloParams.getAccumuloUserName(), token);
        } else {
            // Still in the local JVM, use the username+password or Kerberos credentials
            connector = accumuloParams.getConnector(instance);
        }
        final List<ColumnMapping> columnMappings = columnMapper.getColumnMappings();
        final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
        final Collection<Range> ranges = predicateHandler.getRanges(jobConf, columnMapper);
        // We don't want that.
        if (null != ranges && ranges.isEmpty()) {
            return new InputSplit[0];
        }
        // Set the relevant information in the Configuration for the AccumuloInputFormat
        configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges);
        int numColumns = columnMappings.size();
        List<Integer> readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf);
        // Sanity check
        if (numColumns < readColIds.size())
            throw new IOException("Number of column mappings (" + numColumns + ")" + " numbers less than the hive table columns. (" + readColIds.size() + ")");
        // get splits from Accumulo
        InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits);
        HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length];
        for (int i = 0; i < splits.length; i++) {
            RangeInputSplit ris = (RangeInputSplit) splits[i];
            hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]);
        }
        return hiveSplits;
    } catch (AccumuloException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    } catch (AccumuloSecurityException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    } catch (SerDeException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) JobContext(org.apache.hadoop.mapreduce.JobContext) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Path(org.apache.hadoop.fs.Path) AccumuloException(org.apache.accumulo.core.client.AccumuloException) IOException(java.io.IOException) TooManyAccumuloColumnsException(org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper)

Example 2 with RangeInputSplit

use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.

the class HiveAccumuloTableInputFormat method setTableName.

/**
   * Sets the table name on a RangeInputSplit, accounting for change in method name. Any reflection
   * related exception is wrapped in an {@link IOException}
   *
   * @param split
   *          The RangeInputSplit to operate on
   * @param tableName
   *          The name of the table to set
   * @throws IOException
   */
protected void setTableName(RangeInputSplit split, String tableName) throws IOException {
    // ACCUMULO-3017 shenanigans with method names changing without deprecation
    Method setTableName = null;
    try {
        setTableName = RangeInputSplit.class.getMethod("setTableName", String.class);
    } catch (SecurityException e) {
        log.debug("Could not get getTableName method from RangeInputSplit", e);
    } catch (NoSuchMethodException e) {
        log.debug("Could not get getTableName method from RangeInputSplit", e);
    }
    if (null != setTableName) {
        try {
            setTableName.invoke(split, tableName);
            return;
        } catch (IllegalArgumentException e) {
            log.debug("Could not invoke getTableName method from RangeInputSplit", e);
        } catch (IllegalAccessException e) {
            log.debug("Could not invoke getTableName method from RangeInputSplit", e);
        } catch (InvocationTargetException e) {
            log.debug("Could not invoke getTableName method from RangeInputSplit", e);
        }
    }
    Method setTable;
    try {
        setTable = RangeInputSplit.class.getMethod("setTable", String.class);
    } catch (SecurityException e) {
        throw new IOException("Could not set table name from RangeInputSplit", e);
    } catch (NoSuchMethodException e) {
        throw new IOException("Could not set table name from RangeInputSplit", e);
    }
    try {
        setTable.invoke(split, tableName);
    } catch (IllegalArgumentException e) {
        throw new IOException("Could not set table name from RangeInputSplit", e);
    } catch (IllegalAccessException e) {
        throw new IOException("Could not set table name from RangeInputSplit", e);
    } catch (InvocationTargetException e) {
        throw new IOException("Could not set table name from RangeInputSplit", e);
    }
}
Also used : AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Method(java.lang.reflect.Method) IOException(java.io.IOException) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 3 with RangeInputSplit

use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.

the class HiveAccumuloTableInputFormat method getTableName.

/**
   * Reflection to work around Accumulo 1.5 and 1.6 incompatibilities. Throws an {@link IOException}
   * for any reflection related exceptions
   *
   * @param split
   *          A RangeInputSplit
   * @return The name of the table from the split
   * @throws IOException
   */
protected String getTableName(RangeInputSplit split) throws IOException {
    // ACCUMULO-3017 shenanigans with method names changing without deprecation
    Method getTableName = null;
    try {
        getTableName = RangeInputSplit.class.getMethod("getTableName");
    } catch (SecurityException e) {
        log.debug("Could not get getTableName method from RangeInputSplit", e);
    } catch (NoSuchMethodException e) {
        log.debug("Could not get getTableName method from RangeInputSplit", e);
    }
    if (null != getTableName) {
        try {
            return (String) getTableName.invoke(split);
        } catch (IllegalArgumentException e) {
            log.debug("Could not invoke getTableName method from RangeInputSplit", e);
        } catch (IllegalAccessException e) {
            log.debug("Could not invoke getTableName method from RangeInputSplit", e);
        } catch (InvocationTargetException e) {
            log.debug("Could not invoke getTableName method from RangeInputSplit", e);
        }
    }
    Method getTable;
    try {
        getTable = RangeInputSplit.class.getMethod("getTable");
    } catch (SecurityException e) {
        throw new IOException("Could not get table name from RangeInputSplit", e);
    } catch (NoSuchMethodException e) {
        throw new IOException("Could not get table name from RangeInputSplit", e);
    }
    try {
        return (String) getTable.invoke(split);
    } catch (IllegalArgumentException e) {
        throw new IOException("Could not get table name from RangeInputSplit", e);
    } catch (IllegalAccessException e) {
        throw new IOException("Could not get table name from RangeInputSplit", e);
    } catch (InvocationTargetException e) {
        throw new IOException("Could not get table name from RangeInputSplit", e);
    }
}
Also used : AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) Method(java.lang.reflect.Method) IOException(java.io.IOException) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) InvocationTargetException(java.lang.reflect.InvocationTargetException)

Example 4 with RangeInputSplit

use of org.apache.accumulo.core.client.mapred.RangeInputSplit in project hive by apache.

the class HiveAccumuloTableInputFormat method getRecordReader.

/**
   * Setup accumulo input format from conf properties. Delegates to final RecordReader from mapred
   * package.
   *
   * @param inputSplit
   * @param jobConf
   * @param reporter
   * @return RecordReader
   * @throws IOException
   */
@Override
public RecordReader<Text, AccumuloHiveRow> getRecordReader(InputSplit inputSplit, final JobConf jobConf, final Reporter reporter) throws IOException {
    final ColumnMapper columnMapper;
    try {
        columnMapper = getColumnMapper(jobConf);
    } catch (TooManyAccumuloColumnsException e) {
        throw new IOException(e);
    }
    try {
        final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
        HiveAccumuloSplit hiveSplit = (HiveAccumuloSplit) inputSplit;
        RangeInputSplit rangeSplit = hiveSplit.getSplit();
        log.info("Split: " + rangeSplit);
        // Should be fixed in Accumulo 1.5.2 and 1.6.1
        if (null == rangeSplit.getIterators() || (rangeSplit.getIterators().isEmpty() && !iterators.isEmpty())) {
            log.debug("Re-setting iterators on InputSplit due to Accumulo bug.");
            rangeSplit.setIterators(iterators);
        }
        // but we want it to, so just re-set it if it's null.
        if (null == getTableName(rangeSplit)) {
            final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
            log.debug("Re-setting table name on InputSplit due to Accumulo bug.");
            setTableName(rangeSplit, accumuloParams.getAccumuloTableName());
        }
        final RecordReader<Text, PeekingIterator<Map.Entry<Key, Value>>> recordReader = accumuloInputFormat.getRecordReader(rangeSplit, jobConf, reporter);
        return new HiveAccumuloRecordReader(recordReader, iterators.size());
    } catch (SerDeException e) {
        throw new IOException(StringUtils.stringifyException(e));
    }
}
Also used : Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) PeekingIterator(org.apache.accumulo.core.util.PeekingIterator) TooManyAccumuloColumnsException(org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Value(org.apache.accumulo.core.data.Value) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper)

Aggregations

IOException (java.io.IOException)4 RangeInputSplit (org.apache.accumulo.core.client.mapred.RangeInputSplit)4 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)3 InvocationTargetException (java.lang.reflect.InvocationTargetException)2 Method (java.lang.reflect.Method)2 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)2 AccumuloConnectionParameters (org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters)2 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)2 TooManyAccumuloColumnsException (org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 Map (java.util.Map)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 Connector (org.apache.accumulo.core.client.Connector)1 Instance (org.apache.accumulo.core.client.Instance)1 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)1 AuthenticationToken (org.apache.accumulo.core.client.security.tokens.AuthenticationToken)1 Key (org.apache.accumulo.core.data.Key)1 Range (org.apache.accumulo.core.data.Range)1 Value (org.apache.accumulo.core.data.Value)1 PeekingIterator (org.apache.accumulo.core.util.PeekingIterator)1