use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.
the class TestAccumuloRowSerializer method testBufferResetBeforeUse.
@Test
public void testBufferResetBeforeUse() throws IOException {
ByteStream.Output output = new ByteStream.Output();
PrimitiveObjectInspector fieldObjectInspector = Mockito.mock(StringObjectInspector.class);
ColumnMapping mapping = Mockito.mock(ColumnMapping.class);
// Write some garbage to the buffer that should be erased
output.write("foobar".getBytes());
// Stub out the serializer
AccumuloRowSerializer serializer = Mockito.mock(AccumuloRowSerializer.class);
String object = "hello";
Mockito.when(serializer.getSerializedValue(Mockito.any(ObjectInspector.class), Mockito.any(), Mockito.any(ByteStream.Output.class), Mockito.any(ColumnMapping.class))).thenCallRealMethod();
Mockito.when(fieldObjectInspector.getCategory()).thenReturn(ObjectInspector.Category.PRIMITIVE);
Mockito.when(fieldObjectInspector.getPrimitiveCategory()).thenReturn(PrimitiveCategory.STRING);
Mockito.when(fieldObjectInspector.getPrimitiveWritableObject(Mockito.any(Object.class))).thenReturn(new Text(object));
Mockito.when(mapping.getEncoding()).thenReturn(ColumnEncoding.STRING);
// Invoke the method
serializer.getSerializedValue(fieldObjectInspector, object, output, mapping);
// Verify the buffer was reset (real output doesn't happen because it was mocked)
Assert.assertEquals(0, output.size());
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.
the class LazyAccumuloRow method uncheckedGetField.
/*
* split pairs by delimiter.
*/
private Object uncheckedGetField(int id) {
if (getFieldInited()[id]) {
return getFields()[id].getObject();
}
getFieldInited()[id] = true;
ColumnMapping columnMapping = columnMappings.get(id);
LazyObjectBase field = getFields()[id];
if (columnMapping instanceof HiveAccumuloMapColumnMapping) {
HiveAccumuloMapColumnMapping mapColumnMapping = (HiveAccumuloMapColumnMapping) columnMapping;
LazyAccumuloMap map = (LazyAccumuloMap) field;
map.init(row, mapColumnMapping);
} else {
byte[] value;
if (columnMapping instanceof HiveAccumuloRowIdColumnMapping) {
// Use the rowID directly
value = row.getRowId().getBytes();
} else if (columnMapping instanceof HiveAccumuloColumnMapping) {
HiveAccumuloColumnMapping accumuloColumnMapping = (HiveAccumuloColumnMapping) columnMapping;
// Use the colfam and colqual to get the value
value = row.getValue(new Text(accumuloColumnMapping.getColumnFamilyBytes()), new Text(accumuloColumnMapping.getColumnQualifierBytes()));
} else {
log.error("Could not process ColumnMapping of type " + columnMapping.getClass() + " at offset " + id + " in column mapping: " + columnMapping.getMappingSpec());
throw new IllegalArgumentException("Cannot process ColumnMapping of type " + columnMapping.getClass());
}
if (value == null || isNull(oi.getNullSequence(), value, 0, value.length)) {
field.setNull();
} else {
ByteArrayRef ref = new ByteArrayRef();
ref.setData(value);
field.init(ref, 0, value.length);
}
}
return field.getObject();
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.
the class AccumuloSerDe method initialize.
public void initialize(Configuration conf, Properties properties) throws SerDeException {
accumuloSerDeParameters = new AccumuloSerDeParameters(conf, properties, getClass().getName());
final LazySerDeParameters serDeParams = accumuloSerDeParameters.getSerDeParameters();
final List<ColumnMapping> mappings = accumuloSerDeParameters.getColumnMappings();
final List<TypeInfo> columnTypes = accumuloSerDeParameters.getHiveColumnTypes();
final AccumuloRowIdFactory factory = accumuloSerDeParameters.getRowIdFactory();
ArrayList<ObjectInspector> columnObjectInspectors = getColumnObjectInspectors(columnTypes, serDeParams, mappings, factory);
cachedObjectInspector = LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(serDeParams.getColumnNames(), columnObjectInspectors, serDeParams.getSeparators()[0], serDeParams.getNullSequence(), serDeParams.isLastColumnTakesRest(), serDeParams.isEscaped(), serDeParams.getEscapeChar());
cachedRow = new LazyAccumuloRow((LazySimpleStructObjectInspector) cachedObjectInspector);
serializer = new AccumuloRowSerializer(accumuloSerDeParameters.getRowIdOffset(), accumuloSerDeParameters.getSerDeParameters(), accumuloSerDeParameters.getColumnMappings(), accumuloSerDeParameters.getTableVisibilityLabel(), accumuloSerDeParameters.getRowIdFactory());
if (log.isInfoEnabled()) {
log.info("Initialized with {} type: {}", accumuloSerDeParameters.getSerDeParameters().getColumnNames(), accumuloSerDeParameters.getSerDeParameters().getColumnTypes());
}
}
use of org.apache.hadoop.hive.accumulo.columns.ColumnMapping in project hive by apache.
the class AccumuloSerDe method getColumnObjectInspectors.
protected ArrayList<ObjectInspector> getColumnObjectInspectors(List<TypeInfo> columnTypes, LazySerDeParameters serDeParams, List<ColumnMapping> mappings, AccumuloRowIdFactory factory) throws SerDeException {
ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(columnTypes.size());
for (int i = 0; i < columnTypes.size(); i++) {
TypeInfo type = columnTypes.get(i);
ColumnMapping mapping = mappings.get(i);
if (mapping instanceof HiveAccumuloRowIdColumnMapping) {
columnObjectInspectors.add(factory.createRowIdObjectInspector(type));
} else {
columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(type, serDeParams.getSeparators(), 1, serDeParams.getNullSequence(), serDeParams.isEscaped(), serDeParams.getEscapeChar()));
}
}
return columnObjectInspectors;
}
Aggregations