Search in sources :

Example 1 with HiveAccumuloColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.

the class HiveAccumuloTableInputFormat method getPairCollection.

/**
 * Create col fam/qual pairs from pipe separated values, usually from config object. Ignores
 * rowID.
 *
 * @param columnMappings
 *          The list of ColumnMappings for the given query
 * @return a Set of Pairs of colfams and colquals
 */
protected HashSet<Pair<Text, Text>> getPairCollection(List<ColumnMapping> columnMappings) {
    final HashSet<Pair<Text, Text>> pairs = new HashSet<Pair<Text, Text>>();
    for (ColumnMapping columnMapping : columnMappings) {
        if (columnMapping instanceof HiveAccumuloColumnMapping) {
            HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping;
            Text cf = new Text(accumuloColumnMapping.getColumnFamily());
            Text cq = null;
            // A null cq implies an empty column qualifier
            if (null != accumuloColumnMapping.getColumnQualifier()) {
                cq = new Text(accumuloColumnMapping.getColumnQualifier());
            }
            pairs.add(new Pair<Text, Text>(cf, cq));
        } else if (columnMapping instanceof HiveAccumuloMapColumnMapping) {
            HiveAccumuloMapColumnMapping mapMapping = (HiveAccumuloMapColumnMapping) columnMapping;
            // Can't fetch prefix on colqual, must pull the entire qualifier
            // TODO use an iterator to do the filter, server-side.
            pairs.add(new Pair<Text, Text>(new Text(mapMapping.getColumnFamily()), null));
        }
    }
    log.info("Computed columns to fetch (" + pairs + ") from " + columnMappings);
    return pairs;
}
Also used : HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) Text(org.apache.hadoop.io.Text) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) Pair(org.apache.accumulo.core.util.Pair) HashSet(java.util.HashSet)

Example 2 with HiveAccumuloColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.

the class AccumuloRowSerializer method serialize.

public Mutation serialize(Object obj, ObjectInspector objInspector) throws SerDeException, IOException {
    if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) {
        throw new SerDeException(getClass().toString() + " can only serialize struct types, but we got: " + objInspector.getTypeName());
    }
    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List<? extends StructField> fields = soi.getAllStructFieldRefs();
    List<Object> columnValues = soi.getStructFieldsDataAsList(obj);
    // Fail if we try to access an offset out of bounds
    if (rowIdOffset >= fields.size()) {
        throw new IllegalStateException("Attempted to access field outside of definition for struct. Have " + fields.size() + " fields and tried to access offset " + rowIdOffset);
    }
    StructField field = fields.get(rowIdOffset);
    Object value = columnValues.get(rowIdOffset);
    // The ObjectInspector for the row ID
    ObjectInspector fieldObjectInspector = field.getFieldObjectInspector();
    // Serialize the row component using the RowIdFactory. In the normal case, this will just
    // delegate back to the "local" serializeRowId method
    byte[] data = rowIdFactory.serializeRowId(value, field, output);
    // Set that as the row id in the mutation
    Mutation mutation = new Mutation(data);
    // Each column in the row
    for (int i = 0; i < fields.size(); i++) {
        if (rowIdOffset == i) {
            continue;
        }
        // Get the relevant information for this column
        field = fields.get(i);
        value = columnValues.get(i);
        // Despite having a fixed schema from Hive, we have sparse columns in Accumulo
        if (null == value) {
            continue;
        }
        // The ObjectInspector for the current column
        fieldObjectInspector = field.getFieldObjectInspector();
        // Make sure we got the right implementation of a ColumnMapping
        ColumnMapping mapping = mappings.get(i);
        if (mapping instanceof HiveAccumuloColumnMapping) {
            serializeColumnMapping((HiveAccumuloColumnMapping) mapping, fieldObjectInspector, value, mutation);
        } else if (mapping instanceof HiveAccumuloMapColumnMapping) {
            serializeColumnMapping((HiveAccumuloMapColumnMapping) mapping, fieldObjectInspector, value, mutation);
        } else {
            throw new IllegalArgumentException("Mapping for " + field.getFieldName() + " was not a HiveColumnMapping, but was " + mapping.getClass());
        }
    }
    return mutation;
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Mutation(org.apache.accumulo.core.data.Mutation) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 3 with HiveAccumuloColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.

the class AccumuloPredicateHandler method getIterators.

/**
 * Loop through search conditions and build iterator settings for predicates involving columns
 * other than rowID, if any.
 *
 * @param conf
 *          Configuration
 * @throws SerDeException
 */
public List<IteratorSetting> getIterators(Configuration conf, ColumnMapper columnMapper) throws SerDeException {
    List<IteratorSetting> itrs = Lists.newArrayList();
    boolean shouldPushdown = conf.getBoolean(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, AccumuloSerDeParameters.ITERATOR_PUSHDOWN_DEFAULT);
    if (!shouldPushdown) {
        LOG.info("Iterator pushdown is disabled for this table");
        return itrs;
    }
    boolean binaryEncodedRow = ColumnEncoding.BINARY.getName().equalsIgnoreCase(conf.get(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE));
    int rowIdOffset = columnMapper.getRowIdOffset();
    String[] hiveColumnNamesArr = conf.getStrings(serdeConstants.LIST_COLUMNS);
    if (null == hiveColumnNamesArr) {
        throw new IllegalArgumentException("Could not find Hive columns in configuration");
    }
    String hiveRowIdColumnName = null;
    if (rowIdOffset >= 0 && rowIdOffset < hiveColumnNamesArr.length) {
        hiveRowIdColumnName = hiveColumnNamesArr[rowIdOffset];
    }
    List<String> hiveColumnNames = Arrays.asList(hiveColumnNamesArr);
    for (IndexSearchCondition sc : getSearchConditions(conf)) {
        String col = sc.getColumnDesc().getColumn();
        if (hiveRowIdColumnName == null || !hiveRowIdColumnName.equals(col)) {
            HiveAccumuloColumnMapping mapping = (HiveAccumuloColumnMapping) columnMapper.getColumnMappingForHiveColumn(hiveColumnNames, col);
            itrs.add(toSetting(mapping, sc, binaryEncodedRow));
        }
    }
    LOG.info("num iterators = " + itrs.size());
    return itrs;
}
Also used : IteratorSetting(org.apache.accumulo.core.client.IteratorSetting) IndexSearchCondition(org.apache.hadoop.hive.ql.index.IndexSearchCondition) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)

Example 4 with HiveAccumuloColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.

the class PrimitiveComparisonFilter method init.

@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options, IteratorEnvironment env) throws IOException {
    super.init(source, options, env);
    String serializedColumnMapping = options.get(COLUMN);
    Entry<String, String> pair = ColumnMappingFactory.parseMapping(serializedColumnMapping);
    // The ColumnEncoding, column name and type are all irrelevant at this point, just need the
    // cf:[cq]
    columnMapping = new HiveAccumuloColumnMapping(pair.getKey(), pair.getValue(), ColumnEncoding.STRING, "column", "string");
    columnMappingFamily = new Text(columnMapping.getColumnFamily());
    columnMappingQualifier = new Text(columnMapping.getColumnQualifier());
    cfHolder = new Text();
    cqHolder = new Text();
    try {
        Class<?> pClass = JavaUtils.loadClass(options.get(P_COMPARE_CLASS));
        Class<?> cClazz = JavaUtils.loadClass(options.get(COMPARE_OPT_CLASS));
        PrimitiveComparison pCompare = pClass.asSubclass(PrimitiveComparison.class).newInstance();
        compOpt = cClazz.asSubclass(CompareOp.class).newInstance();
        byte[] constant = getConstant(options);
        pCompare.init(constant);
        compOpt.setPrimitiveCompare(pCompare);
    } catch (ClassNotFoundException e) {
        throw new IOException(e);
    } catch (InstantiationException e) {
        throw new IOException(e);
    } catch (IllegalAccessException e) {
        throw new IOException(e);
    }
}
Also used : PrimitiveComparison(org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)

Example 5 with HiveAccumuloColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping in project hive by apache.

the class TestHiveAccumuloTableInputFormat method testColumnMappingsToPairs.

@Test
public void testColumnMappingsToPairs() {
    List<ColumnMapping> mappings = new ArrayList<ColumnMapping>();
    Set<Pair<Text, Text>> columns = new HashSet<Pair<Text, Text>>();
    // Row ID
    mappings.add(new HiveAccumuloRowIdColumnMapping(AccumuloHiveConstants.ROWID, ColumnEncoding.STRING, "row", TypeInfoFactory.stringTypeInfo.toString()));
    // Some cf:cq
    mappings.add(new HiveAccumuloColumnMapping("person", "name", ColumnEncoding.STRING, "col1", TypeInfoFactory.stringTypeInfo.toString()));
    mappings.add(new HiveAccumuloColumnMapping("person", "age", ColumnEncoding.STRING, "col2", TypeInfoFactory.stringTypeInfo.toString()));
    mappings.add(new HiveAccumuloColumnMapping("person", "height", ColumnEncoding.STRING, "col3", TypeInfoFactory.stringTypeInfo.toString()));
    // Bare cf
    mappings.add(new HiveAccumuloColumnMapping("city", "name", ColumnEncoding.STRING, "col4", TypeInfoFactory.stringTypeInfo.toString()));
    columns.add(new Pair<Text, Text>(new Text("person"), new Text("name")));
    columns.add(new Pair<Text, Text>(new Text("person"), new Text("age")));
    columns.add(new Pair<Text, Text>(new Text("person"), new Text("height")));
    // Null qualifier would mean all qualifiers in that family, want an empty qualifier
    columns.add(new Pair<Text, Text>(new Text("city"), new Text("name")));
    assertEquals(columns, inputformat.getPairCollection(mappings));
}
Also used : HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) ArrayList(java.util.ArrayList) Text(org.apache.hadoop.io.Text) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) Pair(org.apache.accumulo.core.util.Pair) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

HiveAccumuloColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)6 ColumnMapping (org.apache.hadoop.hive.accumulo.columns.ColumnMapping)4 Text (org.apache.hadoop.io.Text)4 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)3 HashSet (java.util.HashSet)2 Pair (org.apache.accumulo.core.util.Pair)2 HiveAccumuloRowIdColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping)2 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)1 Mutation (org.apache.accumulo.core.data.Mutation)1 PrimitiveComparison (org.apache.hadoop.hive.accumulo.predicate.compare.PrimitiveComparison)1 IndexSearchCondition (org.apache.hadoop.hive.ql.index.IndexSearchCondition)1 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)1 ByteArrayRef (org.apache.hadoop.hive.serde2.lazy.ByteArrayRef)1 LazyObjectBase (org.apache.hadoop.hive.serde2.lazy.LazyObjectBase)1 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)1 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)1 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)1 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)1