Search in sources :

Example 6 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithEmptyColumns.

@Test
public void testConfigureAccumuloInputFormatWithEmptyColumns() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    HashSet<Pair<Text, Text>> cfCqPairs = Sets.newHashSet();
    List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";
    IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
    iterators.add(cfg);
    cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
    iterators.add(cfg);
    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
    Mockito.when(mockInputFormat.getPairCollection(columnMapper.getColumnMappings())).thenReturn(cfCqPairs);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
// fetchColumns is not called because we had no columns to fetch
}
Also used : StringCompare(org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare) ArrayList(java.util.ArrayList) Range(org.apache.accumulo.core.data.Range) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) PrimitiveComparisonFilter(org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter) GreaterThanOrEqual(org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual) Equal(org.apache.hadoop.hive.accumulo.predicate.compare.Equal) IntCompare(org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) Test(org.junit.Test)

Example 7 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureMockAccumuloInputFormat.

@Test
public void testConfigureMockAccumuloInputFormat() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = Collections.emptyList();
    Set<Range> ranges = Collections.singleton(new Range());
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setMockInstance(conf, mockInstance.getInstanceName());
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Also used : PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) Range(org.apache.accumulo.core.data.Range) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) Test(org.junit.Test)

Example 8 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class HiveAccumuloTableInputFormat method getRecordReader.

/**
   * Setup accumulo input format from conf properties. Delegates to final RecordReader from mapred
   * package.
   *
   * @param inputSplit
   * @param jobConf
   * @param reporter
   * @return RecordReader
   * @throws IOException
   */
@Override
public RecordReader<Text, AccumuloHiveRow> getRecordReader(InputSplit inputSplit, final JobConf jobConf, final Reporter reporter) throws IOException {
    final ColumnMapper columnMapper;
    try {
        columnMapper = getColumnMapper(jobConf);
    } catch (TooManyAccumuloColumnsException e) {
        throw new IOException(e);
    }
    try {
        final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
        HiveAccumuloSplit hiveSplit = (HiveAccumuloSplit) inputSplit;
        RangeInputSplit rangeSplit = hiveSplit.getSplit();
        log.info("Split: " + rangeSplit);
        // Should be fixed in Accumulo 1.5.2 and 1.6.1
        if (null == rangeSplit.getIterators() || (rangeSplit.getIterators().isEmpty() && !iterators.isEmpty())) {
            log.debug("Re-setting iterators on InputSplit due to Accumulo bug.");
            rangeSplit.setIterators(iterators);
        }
        // but we want it to, so just re-set it if it's null.
        if (null == getTableName(rangeSplit)) {
            final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
            log.debug("Re-setting table name on InputSplit due to Accumulo bug.");
            setTableName(rangeSplit, accumuloParams.getAccumuloTableName());
        }
        final RecordReader<Text, PeekingIterator<Map.Entry<Key, Value>>> recordReader = accumuloInputFormat.getRecordReader(rangeSplit, jobConf, reporter);
        return new HiveAccumuloRecordReader(recordReader, iterators.size());
    } catch (SerDeException e) {
        throw new IOException(StringUtils.stringifyException(e));
    }
}
Also used : Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) PeekingIterator(org.apache.accumulo.core.util.PeekingIterator) TooManyAccumuloColumnsException(org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) Value(org.apache.accumulo.core.data.Value) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) Map(java.util.Map) Key(org.apache.accumulo.core.data.Key) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper)

Example 9 with AccumuloConnectionParameters

use of org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters in project hive by apache.

the class TestHiveAccumuloTableOutputFormat method testSaslConfiguration.

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test
public void testSaslConfiguration() throws IOException, AccumuloException, AccumuloSecurityException {
    final HiveAccumuloTableOutputFormat outputFormat = Mockito.mock(HiveAccumuloTableOutputFormat.class);
    final AuthenticationToken authToken = Mockito.mock(AuthenticationToken.class);
    final Token hadoopToken = Mockito.mock(Token.class);
    final HiveAccumuloHelper helper = Mockito.mock(HiveAccumuloHelper.class);
    final AccumuloConnectionParameters cnxnParams = Mockito.mock(AccumuloConnectionParameters.class);
    final Connector connector = Mockito.mock(Connector.class);
    // Set UGI to use Kerberos
    // Have to use the string constant to support hadoop 1
    conf.set("hadoop.security.authentication", "kerberos");
    UserGroupInformation.setConfiguration(conf);
    // Set the current UGI to a fake user
    UserGroupInformation user1 = UserGroupInformation.createUserForTesting(user, new String[0]);
    // Use that as the "current user"
    Mockito.when(outputFormat.getCurrentUser()).thenReturn(user1);
    // Turn off passwords, enable sasl and set a keytab
    conf.unset(AccumuloConnectionParameters.USER_PASS);
    // Call the real method instead of the mock
    Mockito.doCallRealMethod().when(outputFormat).configureAccumuloOutputFormat(conf);
    // Return our mocked objects
    Mockito.when(outputFormat.getHelper()).thenReturn(helper);
    Mockito.when(outputFormat.getConnectionParams(conf)).thenReturn(cnxnParams);
    Mockito.when(cnxnParams.getConnector()).thenReturn(connector);
    Mockito.when(helper.getDelegationToken(connector)).thenReturn(authToken);
    Mockito.when(helper.getHadoopToken(authToken)).thenReturn(hadoopToken);
    // Stub AccumuloConnectionParameters actions
    Mockito.when(cnxnParams.useSasl()).thenReturn(true);
    Mockito.when(cnxnParams.getAccumuloUserName()).thenReturn(user);
    Mockito.when(cnxnParams.getAccumuloInstanceName()).thenReturn(instanceName);
    Mockito.when(cnxnParams.getZooKeepers()).thenReturn(zookeepers);
    // Stub OutputFormat actions
    Mockito.when(outputFormat.hasKerberosCredentials(user1)).thenReturn(true);
    // Invoke the method
    outputFormat.configureAccumuloOutputFormat(conf);
    // The AccumuloInputFormat methods
    Mockito.verify(outputFormat).setZooKeeperInstanceWithErrorChecking(conf, instanceName, zookeepers, true);
    Mockito.verify(outputFormat).setConnectorInfoWithErrorChecking(conf, user, authToken);
    Mockito.verify(outputFormat).setDefaultAccumuloTableName(conf, outputTable);
    // Other methods we expect
    Mockito.verify(helper).mergeTokenIntoJobConf(conf, hadoopToken);
    // Make sure the token made it into the UGI
    Collection<Token<? extends TokenIdentifier>> tokens = user1.getTokens();
    Assert.assertEquals(1, tokens.size());
    Assert.assertEquals(hadoopToken, tokens.iterator().next());
}
Also used : TokenIdentifier(org.apache.hadoop.security.token.TokenIdentifier) Connector(org.apache.accumulo.core.client.Connector) AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Token(org.apache.hadoop.security.token.Token) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) HiveAccumuloHelper(org.apache.hadoop.hive.accumulo.HiveAccumuloHelper) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Test(org.junit.Test)

Aggregations

AccumuloConnectionParameters (org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters)9 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)7 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)7 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)7 Range (org.apache.accumulo.core.data.Range)6 Test (org.junit.Test)6 Pair (org.apache.accumulo.core.util.Pair)5 ZooKeeperInstance (org.apache.accumulo.core.client.ZooKeeperInstance)4 IOException (java.io.IOException)3 Connector (org.apache.accumulo.core.client.Connector)3 AuthenticationToken (org.apache.accumulo.core.client.security.tokens.AuthenticationToken)3 UserGroupInformation (org.apache.hadoop.security.UserGroupInformation)3 ArrayList (java.util.ArrayList)2 AccumuloException (org.apache.accumulo.core.client.AccumuloException)2 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)2 RangeInputSplit (org.apache.accumulo.core.client.mapred.RangeInputSplit)2 PrimitiveComparisonFilter (org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter)2 Equal (org.apache.hadoop.hive.accumulo.predicate.compare.Equal)2 GreaterThanOrEqual (org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual)2 IntCompare (org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare)2