Search in sources :

Example 1 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormat.

@Test
public void testConfigureAccumuloInputFormat() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = Collections.emptyList();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";
    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Also used : PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) Range(org.apache.accumulo.core.data.Range) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) Test(org.junit.Test)

Example 2 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithIterators.

@Test
public void testConfigureAccumuloInputFormatWithIterators() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";
    IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
    iterators.add(cfg);
    cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
    iterators.add(cfg);
    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Also used : StringCompare(org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare) ArrayList(java.util.ArrayList) Range(org.apache.accumulo.core.data.Range) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) PrimitiveComparisonFilter(org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter) GreaterThanOrEqual(org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual) Equal(org.apache.hadoop.hive.accumulo.predicate.compare.Equal) IntCompare(org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) Test(org.junit.Test)

Example 3 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class HiveAccumuloTableInputFormat method getSplits.

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
    final Instance instance = accumuloParams.getInstance();
    final ColumnMapper columnMapper;
    try {
        columnMapper = getColumnMapper(jobConf);
    } catch (TooManyAccumuloColumnsException e) {
        throw new IOException(e);
    }
    JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf));
    Path[] tablePaths = FileInputFormat.getInputPaths(context);
    try {
        UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
        final Connector connector;
        // Need to get a Connector so we look up the user's authorizations if not otherwise specified
        if (accumuloParams.useSasl() && !ugi.hasKerberosCredentials()) {
            // In a YARN/Tez job, don't have the Kerberos credentials anymore, use the delegation token
            AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(AccumuloInputFormat.class, jobConf);
            // Convert the stub from the configuration back into a normal Token
            // More reflection to support 1.6
            token = helper.unwrapAuthenticationToken(jobConf, token);
            connector = instance.getConnector(accumuloParams.getAccumuloUserName(), token);
        } else {
            // Still in the local JVM, use the username+password or Kerberos credentials
            connector = accumuloParams.getConnector(instance);
        }
        final List<ColumnMapping> columnMappings = columnMapper.getColumnMappings();
        final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
        final Collection<Range> ranges = predicateHandler.getRanges(jobConf, columnMapper);
        // We don't want that.
        if (null != ranges && ranges.isEmpty()) {
            return new InputSplit[0];
        }
        // Set the relevant information in the Configuration for the AccumuloInputFormat
        configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges);
        int numColumns = columnMappings.size();
        List<Integer> readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf);
        // Sanity check
        if (numColumns < readColIds.size())
            throw new IOException("Number of column mappings (" + numColumns + ")" + " numbers less than the hive table columns. (" + readColIds.size() + ")");
        // get splits from Accumulo
        InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits);
        HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length];
        for (int i = 0; i < splits.length; i++) {
            RangeInputSplit ris = (RangeInputSplit) splits[i];
            hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]);
        }
        return hiveSplits;
    } catch (AccumuloException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    } catch (AccumuloSecurityException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    } catch (SerDeException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) JobContext(org.apache.hadoop.mapreduce.JobContext) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Path(org.apache.hadoop.fs.Path) AccumuloException(org.apache.accumulo.core.client.AccumuloException) IOException(java.io.IOException) TooManyAccumuloColumnsException(org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper)

Example 4 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class HiveAccumuloTableOutputFormat method configureAccumuloOutputFormat.

protected void configureAccumuloOutputFormat(JobConf job) throws IOException {
    AccumuloConnectionParameters cnxnParams = getConnectionParams(job);
    final String tableName = job.get(AccumuloSerDeParameters.TABLE_NAME);
    // Make sure we actually go the table name
    Preconditions.checkNotNull(tableName, "Expected Accumulo table name to be provided in job configuration");
    // Set the necessary Accumulo information
    try {
        if (cnxnParams.useMockInstance()) {
            setMockInstanceWithErrorChecking(job, cnxnParams.getAccumuloInstanceName());
        } else {
            // Accumulo instance name with ZK quorum
            setZooKeeperInstanceWithErrorChecking(job, cnxnParams.getAccumuloInstanceName(), cnxnParams.getZooKeepers(), cnxnParams.useSasl());
        }
        // The AccumuloOutputFormat will look for it there.
        if (cnxnParams.useSasl()) {
            UserGroupInformation ugi = getCurrentUser();
            if (!hasKerberosCredentials(ugi)) {
                getHelper().addTokenFromUserToJobConf(ugi, job);
            } else {
                // Still in the local JVM, can use Kerberos credentials
                try {
                    Connector connector = cnxnParams.getConnector();
                    AuthenticationToken token = getHelper().getDelegationToken(connector);
                    // Send the DelegationToken down to the Configuration for Accumulo to use
                    setConnectorInfoWithErrorChecking(job, cnxnParams.getAccumuloUserName(), token);
                    // Convert the Accumulo token in a Hadoop token
                    Token<? extends TokenIdentifier> accumuloToken = getHelper().getHadoopToken(token);
                    log.info("Adding Hadoop Token for Accumulo to Job's Credentials");
                    // Add the Hadoop token to the JobConf
                    getHelper().mergeTokenIntoJobConf(job, accumuloToken);
                    // Make sure the UGI contains the token too for good measure
                    if (!ugi.addToken(accumuloToken)) {
                        throw new IOException("Failed to add Accumulo Token to UGI");
                    }
                } catch (AccumuloException | AccumuloSecurityException e) {
                    throw new IOException("Failed to acquire Accumulo DelegationToken", e);
                }
            }
        } else {
            setConnectorInfoWithErrorChecking(job, cnxnParams.getAccumuloUserName(), new PasswordToken(cnxnParams.getAccumuloPassword()));
        }
        // Set the table where we're writing this data
        setDefaultAccumuloTableName(job, tableName);
    } catch (AccumuloSecurityException e) {
        log.error("Could not connect to Accumulo with provided credentials", e);
        throw new IOException(e);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AccumuloException(org.apache.accumulo.core.client.AccumuloException) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation)

Example 5 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithAuthorizations.

@Test
public void testConfigureAccumuloInputFormatWithAuthorizations() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar");
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = Collections.emptyList();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";
    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, new Authorizations("foo,bar"));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) Range(org.apache.accumulo.core.data.Range) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) Test(org.junit.Test)

Aggregations

AccumuloConnectionParameters (org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters)9 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)7 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)7 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)7 Range (org.apache.accumulo.core.data.Range)6 Test (org.junit.Test)6 Pair (org.apache.accumulo.core.util.Pair)5 ZooKeeperInstance (org.apache.accumulo.core.client.ZooKeeperInstance)4 IOException (java.io.IOException)3 Connector (org.apache.accumulo.core.client.Connector)3 AuthenticationToken (org.apache.accumulo.core.client.security.tokens.AuthenticationToken)3 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)3 ArrayList (java.util.ArrayList)2 AccumuloException (org.apache.accumulo.core.client.AccumuloException)2 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)2 RangeInputSplit (org.apache.accumulo.core.client.mapred.RangeInputSplit)2 PrimitiveComparisonFilter (org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter)2 Equal (org.apache.hadoop.hive.accumulo.predicate.compare.Equal)2 GreaterThanOrEqual (org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual)2 IntCompare (org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare)2