Search in sources :

Example 6 with ColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.

the class TestAccumuloRowSerializer method testBufferResetBeforeUse.

@Test
public void testBufferResetBeforeUse() throws IOException {
    ByteStream.Output output = new ByteStream.Output();
    PrimitiveObjectInspector fieldObjectInspector = Mockito.mock(StringObjectInspector.class);
    ColumnMapping mapping = Mockito.mock(ColumnMapping.class);
    // Write some garbage to the buffer that should be erased
    output.write("foobar".getBytes());
    // Stub out the serializer
    AccumuloRowSerializer serializer = Mockito.mock(AccumuloRowSerializer.class);
    String object = "hello";
    Mockito.when(serializer.getSerializedValue(Mockito.any(ObjectInspector.class), Mockito.any(), Mockito.any(ByteStream.Output.class), Mockito.any(ColumnMapping.class))).thenCallRealMethod();
    Mockito.when(fieldObjectInspector.getCategory()).thenReturn(ObjectInspector.Category.PRIMITIVE);
    Mockito.when(fieldObjectInspector.getPrimitiveCategory()).thenReturn(PrimitiveCategory.STRING);
    Mockito.when(fieldObjectInspector.getPrimitiveWritableObject(Mockito.any(Object.class))).thenReturn(new Text(object));
    Mockito.when(mapping.getEncoding()).thenReturn(ColumnEncoding.STRING);
    // Invoke the method
    serializer.getSerializedValue(fieldObjectInspector, object, output, mapping);
    // Verify the buffer was reset (real output doesn't happen because it was mocked)
    Assert.assertEquals(0, output.size());
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) LazyStringObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyStringObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazyMapObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ByteStream(org.apache.hadoop.hive.serde2.ByteStream) PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) Text(org.apache.hadoop.io.Text) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping) Test(org.junit.Test)

Example 7 with ColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.

the class LazyAccumuloRow method uncheckedGetField.

/*
   * split pairs by delimiter.
   */
private Object uncheckedGetField(int id) {
    if (getFieldInited()[id]) {
        return getFields()[id].getObject();
    }
    getFieldInited()[id] = true;
    ColumnMapping columnMapping = columnMappings.get(id);
    LazyObjectBase field = getFields()[id];
    if (columnMapping instanceof HiveAccumuloMapColumnMapping) {
        HiveAccumuloMapColumnMapping mapColumnMapping = (HiveAccumuloMapColumnMapping) columnMapping;
        LazyAccumuloMap map = (LazyAccumuloMap) field;
        map.init(row, mapColumnMapping);
    } else {
        byte[] value;
        if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) {
            // Use the rowID directly
            value = row.getRowId().getBytes();
        } else if (columnMapping instanceof HiveAccumuloColumnMapping) {
            HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping;
            // Use the colfam and colqual to get the value
            value = row.getValue(new Text(accumuloColumnMapping.getColumnFamilyBytes()), new Text(accumuloColumnMapping.getColumnQualifierBytes()));
        } else {
            log.error("Could not process ColumnMapping of type " + columnMapping.getClass() + " at offset " + id + " in column mapping: " + columnMapping.getMappingSpec());
            throw new IllegalArgumentException("Cannot process ColumnMapping of type " + columnMapping.getClass());
        }
        if (value == null || isNull(oi.getNullSequence(), value, 0, value.length)) {
            field.setNull();
        } else {
            ByteArrayRef ref = new ByteArrayRef();
            ref.setData(value);
            field.init(ref, 0, value.length);
        }
    }
    return field.getObject();
}
Also used : ByteArrayRef(org.apache.hadoop.hive.serde2.lazy.ByteArrayRef) HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) LazyObjectBase(org.apache.hadoop.hive.serde2.lazy.LazyObjectBase) Text(org.apache.hadoop.io.Text) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) HiveAccumuloMapColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping) HiveAccumuloColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping) HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping)

Example 8 with ColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.

the class AccumuloSerDe method initialize.

public void initialize(Configuration conf, Properties properties) throws SerDeException {
    accumuloSerDeParameters = new AccumuloSerDeParameters(conf, properties, getClass().getName());
    final LazySerDeParameters serDeParams = accumuloSerDeParameters.getSerDeParameters();
    final List<ColumnMapping> mappings = accumuloSerDeParameters.getColumnMappings();
    final List<TypeInfo> columnTypes = accumuloSerDeParameters.getHiveColumnTypes();
    final AccumuloRowIdFactory factory = accumuloSerDeParameters.getRowIdFactory();
    ArrayList<ObjectInspector> columnObjectInspectors = getColumnObjectInspectors(columnTypes, serDeParams, mappings, factory);
    cachedObjectInspector = LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(serDeParams.getColumnNames(), columnObjectInspectors, serDeParams.getSeparators()[0], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
    cachedRow = new LazyAccumuloRow((LazySimpleStructObjectInspector) cachedObjectInspector);
    serializer = new AccumuloRowSerializer(accumuloSerDeParameters.getRowIdOffset(), accumuloSerDeParameters.getSerDeParameters(), accumuloSerDeParameters.getColumnMappings(), accumuloSerDeParameters.getTableVisibilityLabel(), accumuloSerDeParameters.getRowIdFactory());
    if (log.isInfoEnabled()) {
        log.info("Initialized with {} type: {}", accumuloSerDeParameters.getSerDeParameters().getColumnNames(), accumuloSerDeParameters.getSerDeParameters().getColumnTypes());
    }
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) LazyAccumuloRow(org.apache.hadoop.hive.accumulo.LazyAccumuloRow) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping)

Example 9 with ColumnMapping

use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.

the class AccumuloSerDe method getColumnObjectInspectors.

protected ArrayList<ObjectInspector> getColumnObjectInspectors(List<TypeInfo> columnTypes, LazySerDeParameters serDeParams, List<ColumnMapping> mappings, AccumuloRowIdFactory factory) throws SerDeException {
    ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(columnTypes.size());
    for (int i = 0; i < columnTypes.size(); i++) {
        TypeInfo type = columnTypes.get(i);
        ColumnMapping mapping = mappings.get(i);
        if (mapping instanceof HiveAccumuloRowIdColumnMapping) {
            columnObjectInspectors.add(factory.createRowIdObjectInspector(type));
        } else {
            columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(type, serDeParams.getSeparators(), 1, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()));
        }
    }
    return columnObjectInspectors;
}
Also used : LazySimpleStructObjectInspector(org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) HiveAccumuloRowIdColumnMapping(org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping) ColumnMapping(org.apache.hadoop.hive.accumulo.columns.ColumnMapping)

Aggregations

ColumnMapping (org.apache.hadoop.hive.accumulo.columns.ColumnMapping)9 HiveAccumuloColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloColumnMapping)5 HiveAccumuloMapColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloMapColumnMapping)4 HiveAccumuloRowIdColumnMapping (org.apache.hadoop.hive.accumulo.columns.HiveAccumuloRowIdColumnMapping)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)4 Text (org.apache.hadoop.io.Text)4 ArrayList (java.util.ArrayList)3 LazySimpleStructObjectInspector (org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector)3 Test (org.junit.Test)3 HashSet (java.util.HashSet)2 Pair (org.apache.accumulo.core.util.Pair)2 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)2 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)2 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)2 IOException (java.io.IOException)1 AccumuloException (org.apache.accumulo.core.client.AccumuloException)1 AccumuloSecurityException (org.apache.accumulo.core.client.AccumuloSecurityException)1 Connector (org.apache.accumulo.core.client.Connector)1 Instance (org.apache.accumulo.core.client.Instance)1 IteratorSetting (org.apache.accumulo.core.client.IteratorSetting)1