Search in sources :

Example 6 with IteratorSetting

use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testDegreesAndMillis.

@Test
public void testDegreesAndMillis() throws Exception {
    Connector con = mockInstance.getConnector(USER, new PasswordToken(PASS.getBytes()));
    Scanner scan = con.createScanner(TEST_TABLE, new Authorizations("blah"));
    IteratorSetting is = new IteratorSetting(1, PrimitiveComparisonFilter.FILTER_PREFIX + 1, PrimitiveComparisonFilter.class);
    is.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, DoubleCompare.class.getName());
    is.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, GreaterThanOrEqual.class.getName());
    is.addOption(PrimitiveComparisonFilter.CONST_VAL, new String(Base64.encodeBase64(parseDoubleBytes("55.6"))));
    is.addOption(PrimitiveComparisonFilter.COLUMN, "cf:dgrs");
    scan.addScanIterator(is);
    IteratorSetting is2 = new IteratorSetting(2, PrimitiveComparisonFilter.FILTER_PREFIX + 2, PrimitiveComparisonFilter.class);
    is2.addOption(PrimitiveComparisonFilter.P_COMPARE_CLASS, LongCompare.class.getName());
    is2.addOption(PrimitiveComparisonFilter.COMPARE_OPT_CLASS, LessThan.class.getName());
    is2.addOption(PrimitiveComparisonFilter.CONST_VAL, new String(Base64.encodeBase64(parseLongBytes("778"))));
    is2.addOption(PrimitiveComparisonFilter.COLUMN, "cf:mills");
    scan.addScanIterator(is2);
    boolean foundDennis = false;
    int totalCount = 0;
    for (Map.Entry<Key, Value> kv : scan) {
        boolean foundName = false;
        boolean foundSid = false;
        boolean foundDegrees = false;
        boolean foundMillis = false;
        SortedMap<Key, Value> items = PrimitiveComparisonFilter.decodeRow(kv.getKey(), kv.getValue());
        for (Map.Entry<Key, Value> item : items.entrySet()) {
            SortedMap<Key, Value> nestedItems = PrimitiveComparisonFilter.decodeRow(item.getKey(), item.getValue());
            for (Map.Entry<Key, Value> nested : nestedItems.entrySet()) {
                if (nested.getKey().getRow().toString().equals("r3")) {
                    foundDennis = true;
                }
                if (nested.getKey().getColumnQualifier().equals(NAME)) {
                    foundName = true;
                } else if (nested.getKey().getColumnQualifier().equals(SID)) {
                    foundSid = true;
                } else if (nested.getKey().getColumnQualifier().equals(DEGREES)) {
                    foundDegrees = true;
                } else if (nested.getKey().getColumnQualifier().equals(MILLIS)) {
                    foundMillis = true;
                }
            }
        }
        totalCount++;
        assertTrue(foundDegrees & foundMillis & foundName & foundSid);
    }
    assertTrue(foundDennis);
    assertEquals(totalCount, 1);
}
Also used : Connector(org.apache.accumulo.core.client.Connector) Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) GreaterThanOrEqual(org.apache.hadoop.hive.accumulo.predicate.compare.GreaterThanOrEqual) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) LessThan(org.apache.hadoop.hive.accumulo.predicate.compare.LessThan) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) LongCompare(org.apache.hadoop.hive.accumulo.predicate.compare.LongCompare) DoubleCompare(org.apache.hadoop.hive.accumulo.predicate.compare.DoubleCompare) Value(org.apache.accumulo.core.data.Value) Map(java.util.Map) SortedMap(java.util.SortedMap) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 7 with IteratorSetting

use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.

the class HiveAccumuloTableInputFormat method getSplits.

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    final AccumuloConnectionParameters accumuloParams = new AccumuloConnectionParameters(jobConf);
    final Instance instance = accumuloParams.getInstance();
    final ColumnMapper columnMapper;
    try {
        columnMapper = getColumnMapper(jobConf);
    } catch (TooManyAccumuloColumnsException e) {
        throw new IOException(e);
    }
    JobContext context = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(jobConf));
    Path[] tablePaths = FileInputFormat.getInputPaths(context);
    try {
        UserGroupInformation ugi = UserGroupInformation.getCurrentUser();
        final Connector connector;
        // Need to get a Connector so we look up the user's authorizations if not otherwise specified
        if (accumuloParams.useSasl() && !ugi.hasKerberosCredentials()) {
            // In a YARN/Tez job, don't have the Kerberos credentials anymore, use the delegation token
            AuthenticationToken token = ConfiguratorBase.getAuthenticationToken(AccumuloInputFormat.class, jobConf);
            // Convert the stub from the configuration back into a normal Token
            // More reflection to support 1.6
            token = helper.unwrapAuthenticationToken(jobConf, token);
            connector = instance.getConnector(accumuloParams.getAccumuloUserName(), token);
        } else {
            // Still in the local JVM, use the username+password or Kerberos credentials
            connector = accumuloParams.getConnector(instance);
        }
        final List<ColumnMapping> columnMappings = columnMapper.getColumnMappings();
        final List<IteratorSetting> iterators = predicateHandler.getIterators(jobConf, columnMapper);
        final Collection<Range> ranges = predicateHandler.getRanges(jobConf, columnMapper);
        // We don't want that.
        if (null != ranges && ranges.isEmpty()) {
            return new InputSplit[0];
        }
        // Set the relevant information in the Configuration for the AccumuloInputFormat
        configure(jobConf, instance, connector, accumuloParams, columnMapper, iterators, ranges);
        int numColumns = columnMappings.size();
        List<Integer> readColIds = ColumnProjectionUtils.getReadColumnIDs(jobConf);
        // Sanity check
        if (numColumns < readColIds.size())
            throw new IOException("Number of column mappings (" + numColumns + ")" + " numbers less than the hive table columns. (" + readColIds.size() + ")");
        // get splits from Accumulo
        InputSplit[] splits = accumuloInputFormat.getSplits(jobConf, numSplits);
        HiveAccumuloSplit[] hiveSplits = new HiveAccumuloSplit[splits.length];
        for (int i = 0; i < splits.length; i++) {
            RangeInputSplit ris = (RangeInputSplit) splits[i];
            hiveSplits[i] = new HiveAccumuloSplit(ris, tablePaths[0]);
        }
        return hiveSplits;
    } catch (AccumuloException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    } catch (AccumuloSecurityException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    } catch (SerDeException e) {
        log.error("Could not configure AccumuloInputFormat", e);
        throw new IOException(StringUtils.stringifyException(e));
    }
}
Also used : Connector(org.apache.accumulo.core.client.Connector) AuthenticationToken(org.apache.accumulo.core.client.security.tokens.AuthenticationToken) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) JobContext(org.apache.hadoop.mapreduce.JobContext) RangeInputSplit(org.apache.accumulo.core.client.mapred.RangeInputSplit) InputSplit(org.apache.hadoop.mapred.InputSplit) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) UserGroupInformation(org.apache.hadoop.security.UserGroupInformation) Path(org.apache.hadoop.fs.Path) AccumuloException(org.apache.accumulo.core.client.AccumuloException) IOException(java.io.IOException) TooManyAccumuloColumnsException(org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException) Range(org.apache.accumulo.core.data.Range) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) AccumuloConnectionParameters(org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper)

Example 8 with IteratorSetting

use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.

the class AccumuloPredicateHandler method getIterators.

/**
   * Loop through search conditions and build iterator settings for predicates involving columns
   * other than rowID, if any.
   *
   * @param conf
   *          Configuration
   * @throws SerDeException
   */
public List<IteratorSetting> getIterators(Configuration conf, ColumnMapper columnMapper) throws SerDeException {
    List<IteratorSetting> itrs = Lists.newArrayList();
    boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT);
    if (!shouldPushdown) {
        log.info("Iterator pushdown is disabled for this table");
        return itrs;
    }
    int rowIdOffset = columnMapper.getRowIdOffset();
    String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
    if (null == hiveColumnNamesArr) {
        throw new IllegalArgumentException("Could not find Hive columns in configuration");
    }
    String hiveRowIdColumnName = null;
    if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) {
        hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
    }
    List<String> hiveColumnNames = Arrays.asList(hiveColumnNamesArr);
    for (IndexSearchCondition sc : getSearchConditions(conf)) {
        String col = sc.getColumnDesc().getColumn();
        if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) {
            HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper.getColumnMappingForHiveColumn(hiveColumnNames, col);
            itrs.add(toSetting(mapping, sc));
        }
    }
    if (log.isInfoEnabled())
        log.info("num iterators = " + itrs.size());
    return itrs;
}
Also used : IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)

Example 9 with IteratorSetting

use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.

the class TestAccumuloPredicateHandler method testIteratorIgnoreRowIDFields.

@Test
public void testIteratorIgnoreRowIDFields() {
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "rid", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "bbb");
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    String filterExpr = SerializationUtilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
        assertEquals(iterators.size(), 0);
    } catch (SerDeException e) {
        StringUtils.stringifyException(e);
    }
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Example 10 with IteratorSetting

use of org.apache.accumulo.core.client.IteratorSetting in project hive by apache.

the class TestAccumuloPredicateHandler method testIgnoreIteratorPushdown.

@Test
public void testIgnoreIteratorPushdown() throws TooManyAccumuloColumnsException {
    // Override what's placed in the Configuration by setup()
    conf = new JobConf();
    List<String> columnNames = Arrays.asList("field1", "field2", "rid");
    List<TypeInfo> columnTypes = Arrays.<TypeInfo>asList(TypeInfoFactory.stringTypeInfo, TypeInfoFactory.intTypeInfo, TypeInfoFactory.stringTypeInfo);
    conf.set(serdeConstants.LIST_COLUMNS, Joiner.on(',').join(columnNames));
    conf.set(serdeConstants.LIST_COLUMN_TYPES, "string,int,string");
    String columnMappingStr = "cf:f1,cf:f2,:rowID";
    conf.set(AccumuloSerDeParameters.COLUMN_MAPPINGS, columnMappingStr);
    columnMapper = new ColumnMapper(columnMappingStr, ColumnEncoding.STRING.getName(), columnNames, columnTypes);
    ExprNodeDesc column = new ExprNodeColumnDesc(TypeInfoFactory.stringTypeInfo, "field1", null, false);
    ExprNodeDesc constant = new ExprNodeConstantDesc(TypeInfoFactory.stringTypeInfo, "aaa");
    List<ExprNodeDesc> children = Lists.newArrayList();
    children.add(column);
    children.add(constant);
    ExprNodeDesc node = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPEqualOrLessThan(), children);
    assertNotNull(node);
    ExprNodeDesc column2 = new ExprNodeColumnDesc(TypeInfoFactory.intTypeInfo, "field2", null, false);
    ExprNodeDesc constant2 = new ExprNodeConstantDesc(TypeInfoFactory.intTypeInfo, 5);
    List<ExprNodeDesc> children2 = Lists.newArrayList();
    children2.add(column2);
    children2.add(constant2);
    ExprNodeDesc node2 = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPGreaterThan(), children2);
    assertNotNull(node2);
    List<ExprNodeDesc> bothFilters = Lists.newArrayList();
    bothFilters.add(node);
    bothFilters.add(node2);
    ExprNodeGenericFuncDesc both = new ExprNodeGenericFuncDesc(TypeInfoFactory.stringTypeInfo, new GenericUDFOPAnd(), bothFilters);
    String filterExpr = SerializationUtilities.serializeExpression(both);
    conf.set(TableScanDesc.FILTER_EXPR_CONF_STR, filterExpr);
    conf.setBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, false);
    try {
        List<IteratorSetting> iterators = handler.getIterators(conf, columnMapper);
        assertEquals(iterators.size(), 0);
    } catch (Exception e) {
        fail(StringUtils.stringifyException(e));
    }
}
Also used : GenericUDFOPGreaterThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan) ExprNodeConstantDesc(org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc) GenericUDFOPEqualOrLessThan(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan) ExprNodeGenericFuncDesc(org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) TooManyAccumuloColumnsException(org.apache.hadoop.hive.accumulo.serde.TooManyAccumuloColumnsException) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) ExprNodeColumnDesc(org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc) ExprNodeDesc(org.apache.hadoop.hive.ql.plan.ExprNodeDesc) JobConf(org.apache.hadoop.mapred.JobConf) ColumnMapper(org.apache.hadoop.hive.accumulo.columns.ColumnMapper) GenericUDFOPAnd(org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd) Test(org.junit.Test)

Aggregations

IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)35 Test (org.junit.Test)16 Scanner (org.apache.accumulo.core.client.Scanner)13 Authorizations (org.apache.accumulo.core.security.Authorizations)13 Key (org.apache.accumulo.core.data.Key)11 Value (org.apache.accumulo.core.data.Value)11 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)10 ColumnMapper (org.apache.hadoop.hive.accumulo.columns.ColumnMapper)10 Entry (java.util.Map.Entry)9 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)8 Range (org.apache.accumulo.core.data.Range)8 Map (java.util.Map)7 AccumuloException (org.apache.accumulo.core.client.AccumuloException)7 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)7 Mutation (org.apache.accumulo.core.data.Mutation)7 ColumnVisibility (org.apache.accumulo.core.security.ColumnVisibility)7 AccumuloConnectionParameters (org.apache.hadoop.hive.accumulo.AccumuloConnectionParameters)7 BatchWriter (org.apache.accumulo.core.client.BatchWriter)6 IteratorSettingBuilder (uk.gov.gchq.gaffer.accumulostore.utils.IteratorSettingBuilder)6 Edge (uk.gov.gchq.gaffer.data.element.Edge)6