use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.
the class HBaseUtils method deserializePartitionKey.
// Deserialize a partition key and return _only_ the partition values.
private static List<String> deserializePartitionKey(List<FieldSchema> partitions, byte[] key, Configuration conf) {
StringBuffer names = new StringBuffer();
names.append("dbName,tableName,");
StringBuffer types = new StringBuffer();
types.append("string,string,");
for (int i = 0; i < partitions.size(); i++) {
names.append(partitions.get(i).getName());
types.append(TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType()));
if (i != partitions.size() - 1) {
names.append(",");
types.append(",");
}
}
BinarySortableSerDe serDe = new BinarySortableSerDe();
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, names.toString());
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types.toString());
try {
serDe.initialize(conf, props);
List deserializedkeys = ((List) serDe.deserialize(new BytesWritable(key))).subList(2, partitions.size() + 2);
List<String> partitionKeys = new ArrayList<String>();
for (int i = 0; i < deserializedkeys.size(); i++) {
Object deserializedKey = deserializedkeys.get(i);
if (deserializedKey == null) {
partitionKeys.add(HiveConf.getVar(conf, HiveConf.ConfVars.DEFAULTPARTITIONNAME));
} else {
TypeInfo inputType = TypeInfoUtils.getTypeInfoFromTypeString(partitions.get(i).getType());
ObjectInspector inputOI = TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(inputType);
Converter converter = ObjectInspectorConverters.getConverter(inputOI, PrimitiveObjectInspectorFactory.javaStringObjectInspector);
partitionKeys.add((String) converter.convert(deserializedKey));
}
}
return partitionKeys;
} catch (SerDeException e) {
throw new RuntimeException("Error when deserialize key", e);
}
}
use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.
the class HybridHashTableContainer method setSerde.
@Override
public void setSerde(MapJoinObjectSerDeContext keyCtx, MapJoinObjectSerDeContext valCtx) throws SerDeException {
AbstractSerDe keySerde = keyCtx.getSerDe(), valSerde = valCtx.getSerDe();
if (writeHelper == null) {
LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName());
// We assume this hashtable is loaded only when tez is enabled
LazyBinaryStructObjectInspector valSoi = (LazyBinaryStructObjectInspector) valSerde.getObjectInspector();
writeHelper = new MapJoinBytesTableContainer.LazyBinaryKvWriter(keySerde, valSoi, valCtx.hasFilterTag());
if (internalValueOi == null) {
internalValueOi = valSoi;
}
if (sortableSortOrders == null) {
sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders();
}
if (nullMarkers == null) {
nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers();
}
if (notNullMarkers == null) {
notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers();
}
}
}
use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.
the class MapJoinBytesTableContainer method setSerde.
@Override
public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext) throws SerDeException {
AbstractSerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe();
if (writeHelper == null) {
LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName());
if (keySerde instanceof BinarySortableSerDe && valSerde instanceof LazyBinarySerDe) {
LazyBinaryStructObjectInspector valSoi = (LazyBinaryStructObjectInspector) valSerde.getObjectInspector();
writeHelper = new LazyBinaryKvWriter(keySerde, valSoi, valueContext.hasFilterTag());
internalValueOi = valSoi;
sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders();
nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers();
notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers();
} else {
writeHelper = new KeyValueWriter(keySerde, valSerde, valueContext.hasFilterTag());
internalValueOi = createInternalOi(valueContext);
sortableSortOrders = null;
nullMarkers = null;
notNullMarkers = null;
}
}
}
use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.
the class SparkReduceRecordHandler method init.
@Override
@SuppressWarnings("unchecked")
public void init(JobConf job, OutputCollector output, Reporter reporter) throws Exception {
perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
super.init(job, output, reporter);
rowObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
ReduceWork gWork = Utilities.getReduceWork(job);
reducer = gWork.getReducer();
vectorized = gWork.getVectorMode();
// clear out any parents as reducer is the
reducer.setParentOperators(null);
// root
isTagged = gWork.getNeedsTagging();
try {
keyTableDesc = gWork.getKeyDesc();
inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
valueTableDesc = new TableDesc[gWork.getTagToValueDesc().size()];
if (vectorized) {
final int maxTags = gWork.getTagToValueDesc().size();
// CONSIDER: Cleaning up this code and eliminating the arrays. Vectorization only handles
// one operator tree.
Preconditions.checkState(maxTags == 1);
keyStructInspector = (StructObjectInspector) keyObjectInspector;
firstValueColumnOffset = keyStructInspector.getAllStructFieldRefs().size();
buffer = new DataOutputBuffer();
}
for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
// We should initialize the SerDe with the TypeInfo when available.
valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
inputValueDeserializer[tag] = ReflectionUtils.newInstance(valueTableDesc[tag].getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null, valueTableDesc[tag].getProperties(), null);
valueObjectInspector[tag] = inputValueDeserializer[tag].getObjectInspector();
ArrayList<ObjectInspector> ois = new ArrayList<ObjectInspector>();
if (vectorized) {
/* vectorization only works with struct object inspectors */
valueStructInspector = (StructObjectInspector) valueObjectInspector[tag];
final int totalColumns = firstValueColumnOffset + valueStructInspector.getAllStructFieldRefs().size();
rowObjectInspector[tag] = Utilities.constructVectorizedReduceRowOI(keyStructInspector, valueStructInspector);
batch = gWork.getVectorizedRowBatchCtx().createVectorizedRowBatch();
// Setup vectorized deserialization for the key and value.
BinarySortableSerDe binarySortableSerDe = (BinarySortableSerDe) inputKeyDeserializer;
keyBinarySortableDeserializeToRow = new VectorDeserializeRow<BinarySortableDeserializeRead>(new BinarySortableDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(keyStructInspector), /* useExternalBuffer */
true, binarySortableSerDe.getSortOrders(), binarySortableSerDe.getNullMarkers(), binarySortableSerDe.getNotNullMarkers()));
keyBinarySortableDeserializeToRow.init(0);
final int valuesSize = valueStructInspector.getAllStructFieldRefs().size();
if (valuesSize > 0) {
valueLazyBinaryDeserializeToRow = new VectorDeserializeRow<LazyBinaryDeserializeRead>(new LazyBinaryDeserializeRead(VectorizedBatchUtil.typeInfosFromStructObjectInspector(valueStructInspector), /* useExternalBuffer */
true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
for (int i = firstValueColumnOffset; i < batch.numCols; i++) {
ColumnVector colVector = batch.cols[i];
if (colVector instanceof BytesColumnVector) {
BytesColumnVector bytesColumnVector = (BytesColumnVector) colVector;
bytesColumnVector.initBuffer();
}
}
}
} else {
ois.add(keyObjectInspector);
ois.add(valueObjectInspector[tag]);
// reducer.setGroupKeyObjectInspector(keyObjectInspector);
rowObjectInspector[tag] = ObjectInspectorFactory.getStandardStructObjectInspector(Utilities.reduceFieldNameList, ois);
}
}
} catch (Exception e) {
throw new RuntimeException(e);
}
ExecMapperContext execContext = new ExecMapperContext(job);
localWork = gWork.getMapRedLocalWork();
execContext.setJc(jc);
execContext.setLocalWork(localWork);
reducer.passExecContext(execContext);
reducer.setReporter(rp);
OperatorUtils.setChildrenCollector(Arrays.<Operator<? extends OperatorDesc>>asList(reducer), output);
// initialize reduce operator tree
try {
LOG.info(reducer.dump(0));
reducer.initialize(jc, rowObjectInspector);
if (localWork != null) {
for (Operator<? extends OperatorDesc> dummyOp : localWork.getDummyParentOp()) {
dummyOp.setExecContext(execContext);
dummyOp.initialize(jc, null);
}
}
} catch (Throwable e) {
abort = true;
if (e instanceof OutOfMemoryError) {
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
throw new RuntimeException("Reduce operator initialization failed", e);
}
}
perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_INIT_OPERATORS);
}
use of org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe in project hive by apache.
the class MapJoinTestConfig method createMapJoinTableContainerSerDe.
public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoinDesc mapJoinDesc) throws SerDeException {
final Byte smallTablePos = 1;
// UNDONE: Why do we need to specify BinarySortableSerDe explicitly here???
TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc();
AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(BinarySortableSerDe.class, null);
SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
TableDesc valueTableDesc;
if (mapJoinDesc.getNoOuterJoin()) {
valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos);
} else {
valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos);
}
AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(valueTableDesc.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
MapJoinObjectSerDeContext valueContext = new MapJoinObjectSerDeContext(valueSerDe, hasFilter(mapJoinDesc, smallTablePos));
MapJoinTableContainerSerDe mapJoinTableContainerSerDe = new MapJoinTableContainerSerDe(keyContext, valueContext);
return mapJoinTableContainerSerDe;
}
Aggregations