Search in sources :

Example 6 with Pair

use of org.apache.accumulo.core.util.Pair in project gora by apache.

the class AccumuloStore method readMapping.

protected AccumuloMapping readMapping(String filename) throws IOException {
    try {
        AccumuloMapping mapping = new AccumuloMapping();
        DocumentBuilder db = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        Document dom = db.parse(getClass().getClassLoader().getResourceAsStream(filename));
        Element root = dom.getDocumentElement();
        NodeList nl = root.getElementsByTagName("class");
        for (int i = 0; i < nl.getLength(); i++) {
            Element classElement = (Element) nl.item(i);
            if (classElement.getAttribute("keyClass").equals(keyClass.getCanonicalName()) && classElement.getAttribute("name").equals(persistentClass.getCanonicalName())) {
                mapping.tableName = getSchemaName(classElement.getAttribute("table"), persistentClass);
                mapping.encoder = classElement.getAttribute("encoder");
                NodeList fields = classElement.getElementsByTagName("field");
                for (int j = 0; j < fields.getLength(); j++) {
                    Element fieldElement = (Element) fields.item(j);
                    String name = fieldElement.getAttribute("name");
                    String family = fieldElement.getAttribute("family");
                    String qualifier = fieldElement.getAttribute("qualifier");
                    if ("".equals(qualifier))
                        qualifier = null;
                    Pair<Text, Text> col = new Pair<>(new Text(family), qualifier == null ? null : new Text(qualifier));
                    mapping.fieldMap.put(name, col);
                    mapping.columnMap.put(col, name);
                }
            }
        }
        if (mapping.tableName == null) {
            throw new GoraException("Please define the accumulo 'table' name mapping in " + filename + " for " + persistentClass.getCanonicalName());
        }
        nl = root.getElementsByTagName("table");
        for (int i = 0; i < nl.getLength(); i++) {
            Element tableElement = (Element) nl.item(i);
            if (tableElement.getAttribute("name").equals(mapping.tableName)) {
                NodeList configs = tableElement.getElementsByTagName("config");
                for (int j = 0; j < configs.getLength(); j++) {
                    Element configElement = (Element) configs.item(j);
                    String key = configElement.getAttribute("key");
                    String val = configElement.getAttribute("value");
                    mapping.tableConfig.put(key, val);
                }
            }
        }
        return mapping;
    } catch (Exception ex) {
        throw new IOException("Unable to read " + filename, ex);
    }
}
Also used : Element(org.w3c.dom.Element) NodeList(org.w3c.dom.NodeList) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) Document(org.w3c.dom.Document) TableOfflineException(org.apache.accumulo.core.client.TableOfflineException) TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) GoraException(org.apache.gora.util.GoraException) MutationsRejectedException(org.apache.accumulo.core.client.MutationsRejectedException) TableExistsException(org.apache.accumulo.core.client.TableExistsException) TableDeletedException(org.apache.accumulo.core.client.TableDeletedException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) AccumuloException(org.apache.accumulo.core.client.AccumuloException) GoraException(org.apache.gora.util.GoraException) DocumentBuilder(javax.xml.parsers.DocumentBuilder) Pair(org.apache.accumulo.core.util.Pair)

Example 7 with Pair

use of org.apache.accumulo.core.util.Pair in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithAuthorizations.

@Test
public void testConfigureAccumuloInputFormatWithAuthorizations() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    conf.set(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, "foo,bar");
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = Collections.emptyList();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";
    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, new Authorizations("foo,bar"));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) Range(org.apache.accumulo.core.data.Range) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) Test(org.junit.Test)

Example 8 with Pair

use of org.apache.accumulo.core.util.Pair in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureAccumuloInputFormatWithEmptyColumns.

@Test
public void testConfigureAccumuloInputFormatWithEmptyColumns() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    HashSet<Pair<Text, Text>> cfCqPairs = Sets.newHashSet();
    List<IteratorSetting> iterators = new ArrayList<IteratorSetting>();
    Set<Range> ranges = Collections.singleton(new Range());
    String instanceName = "realInstance";
    String zookeepers = "host1:2181,host2:2181,host3:2181";
    IteratorSetting cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, StringCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "dave");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:name");
    iterators.add(cfg);
    cfg = new IteratorSetting(50, PrimitiveComparisonFilter.class);
    cfg.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, IntCompare.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, Equal.class.getName());
    cfg.addOption(PrimitiveComparisonFilter.CONST_VAL, "50");
    cfg.addOption(PrimitiveComparisonFilter.COLUMN, "person:age");
    iterators.add(cfg);
    ZooKeeperInstance zkInstance = Mockito.mock(ZooKeeperInstance.class);
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Stub out the ZKI mock
    Mockito.when(zkInstance.getInstanceName()).thenReturn(instanceName);
    Mockito.when(zkInstance.getZooKeepers()).thenReturn(zookeepers);
    Mockito.when(mockInputFormat.getPairCollection(columnMapper.getColumnMappings())).thenReturn(cfCqPairs);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, zkInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setZooKeeperInstance(conf, instanceName, zookeepers, false);
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
// fetchColumns is not called because we had no columns to fetch
}
Also used : StringCompare(org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare) ArrayList(java.util.ArrayList) Range(org.apache.accumulo.core.data.Range) ZooKeeperInstance(org.apache.accumulo.core.client.ZooKeeperInstance) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) PrimitiveComparisonFilter(org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter) GreaterThanOrEqual(org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual) Equal(org.apache.hadoop.hive.accumulo.predicate.compare.Equal) IntCompare(org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) Test(org.junit.Test)

Example 9 with Pair

use of org.apache.accumulo.core.util.Pair in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testConfigureMockAccumuloInputFormat.

@Test
public void testConfigureMockAccumuloInputFormat() throws Exception {
    AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(conf);
    ColumnMapper columnMapper = new ColumnMapper(conf.get(AccumuloSerDeParameters.COLUMN_MAPPINGS), conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE), columnNames, columnTypes);
    Set<Pair<Text, Text>> cfCqPairs = inputformat.getPairCollection(columnMapper.getColumnMappings());
    List<IteratorSetting> iterators = Collections.emptyList();
    Set<Range> ranges = Collections.singleton(new Range());
    HiveAccumuloTableInputFormat mockInputFormat = Mockito.mock(HiveAccumuloTableInputFormat.class);
    // Call out to the real configure method
    Mockito.doCallRealMethod().when(mockInputFormat).configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Also compute the correct cf:cq pairs so we can assert the right argument was passed
    Mockito.doCallRealMethod().when(mockInputFormat).getPairCollection(columnMapper.getColumnMappings());
    mockInputFormat.configure(conf, mockInstance, con, accumuloParams, columnMapper, iterators, ranges);
    // Verify that the correct methods are invoked on AccumuloInputFormat
    Mockito.verify(mockInputFormat).setMockInstance(conf, mockInstance.getInstanceName());
    Mockito.verify(mockInputFormat).setConnectorInfo(conf, USER, new PasswordToken(PASS));
    Mockito.verify(mockInputFormat).setInputTableName(conf, TEST_TABLE);
    Mockito.verify(mockInputFormat).setScanAuthorizations(conf, con.securityOperations().getUserAuthorizations(USER));
    Mockito.verify(mockInputFormat).addIterators(conf, iterators);
    Mockito.verify(mockInputFormat).setRanges(conf, ranges);
    Mockito.verify(mockInputFormat).fetchColumns(conf, cfCqPairs);
}
Also used : PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) Range(org.apache.accumulo.core.data.Range) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) Pair(org.apache.accumulo.core.util.Pair) Test(org.junit.Test)

Example 10 with Pair

use of org.apache.accumulo.core.util.Pair in project hive by apache.

the class HiveAccumuloTableInputFormat method configure.

/**
   * Configure the underlying AccumuloInputFormat
   *
   * @param conf
   *          Job configuration
   * @param instance
   *          Accumulo instance
   * @param connector
   *          Accumulo connector
   * @param accumuloParams
   *          Connection information to the Accumulo instance
   * @param columnMapper
   *          Configuration of Hive to Accumulo columns
   * @param iterators
   *          Any iterators to be configured server-side
   * @param ranges
   *          Accumulo ranges on for the query
   * @throws AccumuloSecurityException
   * @throws AccumuloException
   * @throws SerDeException
   */
protected void configure(JobConf conf, Instance instance, Connector connector, AccumuloConnectionParameters accumuloParams, ColumnMapper columnMapper, List<IteratorSetting> iterators, Collection<Range> ranges) throws AccumuloSecurityException, AccumuloException, SerDeException, IOException {
    // Handle implementation of Instance and invoke appropriate InputFormat method
    if (instance instanceof MockInstance) {
        setMockInstance(conf, instance.getInstanceName());
    } else {
        setZooKeeperInstance(conf, instance.getInstanceName(), instance.getZooKeepers(), accumuloParams.useSasl());
    }
    // Set the username/passwd for the Accumulo connection
    if (accumuloParams.useSasl()) {
        UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
        // If we have Kerberos credentials, we should obtain the delegation token
        if (ugi.hasKerberosCredentials()) {
            Connector conn = accumuloParams.getConnector();
            AuthenticationToken token = helper.getDelegationToken(conn);
            // Send the DelegationToken down to the Configuration for Accumulo to use
            setConnectorInfo(conf, accumuloParams.getAccumuloUserName(), token);
            // Convert the Accumulo token in a Hadoop token
            Token<? extends TokenIdentifier> accumuloToken = helper.getHadoopToken(token);
            log.info("Adding Hadoop Token for Accumulo to Job's Credentials");
            // Add the Hadoop token to the JobConf
            helper.mergeTokenIntoJobConf(conf, accumuloToken);
            if (!ugi.addToken(accumuloToken)) {
                throw new IOException("Failed to add Accumulo Token to UGI");
            }
        }
        try {
            helper.addTokenFromUserToJobConf(ugi, conf);
        } catch (IOException e) {
            throw new IOException("Current user did not contain necessary delegation Tokens " + ugi, e);
        }
    } else {
        setConnectorInfo(conf, accumuloParams.getAccumuloUserName(), new PasswordToken(accumuloParams.getAccumuloPassword()));
    }
    // Read from the given Accumulo table
    setInputTableName(conf, accumuloParams.getAccumuloTableName());
    // Check Configuration for any user-provided Authorization definition
    Authorizations auths = AccumuloSerDeParameters.getAuthorizationsFromConf(conf);
    if (null == auths) {
        // Default to all of user's authorizations when no configuration is provided
        auths = connector.securityOperations().getUserAuthorizations(accumuloParams.getAccumuloUserName());
    }
    // Implicitly handles users providing invalid authorizations
    setScanAuthorizations(conf, auths);
    // restrict with any filters found from WHERE predicates.
    addIterators(conf, iterators);
    // not setting ranges scans the entire table
    if (null != ranges) {
        log.info("Setting ranges: " + ranges);
        setRanges(conf, ranges);
    }
    // Restrict the set of columns that we want to read from the Accumulo table
    HashSet<Pair<Text, Text>> pairs = getPairCollection(columnMapper.getColumnMappings());
    if (null != pairs && !pairs.isEmpty()) {
        fetchColumns(conf, pairs);
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) Authorizations(org.apache.accumulo.core.security.Authorizations) AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) IOException(java.io.IOException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Pair(org.apache.accumulo.core.util.Pair)

Aggregations

Pair (org.apache.accumulo.core.util.Pair)10 Test (org.junit.Test)7 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)6 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)6 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)5 Range (org.apache.accumulo.core.data.Range)5 AccumuloConnectionParameters (org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters)5 ZooKeeperInstance (org.apache.accumulo.core.client.ZooKeeperInstance)4 Text (org.apache.hadoop.io.Text)4 ArrayList (java.util.ArrayList)3 IOException (java.io.IOException)2 HashSet (java.util.HashSet)2 Authorizations (org.apache.accumulo.core.security.Authorizations)2 ColumnMapping (org.apache.hadoop.hive.accumulo.columns.ColumnMapping)2 HiveAccumuloColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)2 PrimitiveComparisonFilter (org.apache.hadoop.hive.accumulo.predicate.PrimitiveComparisonFilter)2 Equal (org.apache.hadoop.hive.accumulo.predicate.compare.Equal)2 GreaterThanOrEqual (org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual)2 IntCompare (org.apache.hadoop.hive.accumulo.predicate.compare.IntCompare)2 StringCompare (org.apache.hadoop.hive.accumulo.predicate.compare.StringCompare)2