use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class MapJoinBytesTableContainer method createInternalOi.
private LazyBinaryStructObjectInspector createInternalOi(MapJoinObjectSerDeContext valCtx) throws SerDeException {
// We are going to use LBSerDe to serialize values; create OI for retrieval.
List<? extends StructField> fields = ((StructObjectInspector) valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs();
List<String> colNames = new ArrayList<String>(fields.size());
List<ObjectInspector> colOis = new ArrayList<ObjectInspector>(fields.size());
for (int i = 0; i < fields.size(); ++i) {
StructField field = fields.get(i);
colNames.add(field.getFieldName());
// It would be nice if OI could return typeInfo...
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(field.getFieldObjectInspector().getTypeName());
colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo));
}
return LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(colNames, colOis);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class SparkDynamicPartitionPruner method prunePartitionSingleSource.
private void prunePartitionSingleSource(SourceInfo info, MapWork work) throws HiveException {
Set<Object> values = info.values;
String columnName = info.columnName;
ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(TypeInfoFactory.getPrimitiveTypeInfo(info.fieldInspector.getTypeName()));
ObjectInspectorConverters.Converter converter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi);
StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(Collections.singletonList(columnName), Collections.singletonList(oi));
@SuppressWarnings("rawtypes") ExprNodeEvaluator eval = ExprNodeEvaluatorFactory.get(info.partKey);
eval.initialize(soi);
applyFilterToPartitions(work, converter, eval, columnName, values);
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class VectorizedBatchUtil method setVector.
private static void setVector(Object row, StructObjectInspector oi, StructField field, VectorizedRowBatch batch, DataOutputBuffer buffer, int rowIndex, int colIndex, int offset) throws HiveException {
Object fieldData = oi.getStructFieldData(row, field);
ObjectInspector foi = field.getFieldObjectInspector();
// Vectorization only supports PRIMITIVE data types. Assert the same
assert (foi.getCategory() == Category.PRIMITIVE);
// Get writable object
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
Object writableCol = poi.getPrimitiveWritableObject(fieldData);
// float/double. String types have no default value for null.
switch(poi.getPrimitiveCategory()) {
case BOOLEAN:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case BYTE:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case SHORT:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case INT:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case LONG:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case DATE:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case FLOAT:
{
DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
dcv.isNull[rowIndex] = false;
} else {
dcv.vector[rowIndex] = Double.NaN;
setNullColIsNullValue(dcv, rowIndex);
}
}
break;
case DOUBLE:
{
DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
dcv.isNull[rowIndex] = false;
} else {
dcv.vector[rowIndex] = Double.NaN;
setNullColIsNullValue(dcv, rowIndex);
}
}
break;
case TIMESTAMP:
{
TimestampColumnVector lcv = (TimestampColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
lcv.set(rowIndex, ((TimestampWritable) writableCol).getTimestamp());
lcv.isNull[rowIndex] = false;
} else {
lcv.setNullValue(rowIndex);
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case INTERVAL_YEAR_MONTH:
{
LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
HiveIntervalYearMonth i = ((HiveIntervalYearMonthWritable) writableCol).getHiveIntervalYearMonth();
lcv.vector[rowIndex] = i.getTotalMonths();
lcv.isNull[rowIndex] = false;
} else {
lcv.vector[rowIndex] = 1;
setNullColIsNullValue(lcv, rowIndex);
}
}
break;
case INTERVAL_DAY_TIME:
{
IntervalDayTimeColumnVector icv = (IntervalDayTimeColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
HiveIntervalDayTime idt = ((HiveIntervalDayTimeWritable) writableCol).getHiveIntervalDayTime();
icv.set(rowIndex, idt);
icv.isNull[rowIndex] = false;
} else {
icv.setNullValue(rowIndex);
setNullColIsNullValue(icv, rowIndex);
}
}
break;
case BINARY:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
BytesWritable bw = (BytesWritable) writableCol;
byte[] bytes = bw.getBytes();
int start = buffer.getLength();
int length = bw.getLength();
try {
buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case STRING:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
Text colText = (Text) writableCol;
int start = buffer.getLength();
int length = colText.getLength();
try {
buffer.write(colText.getBytes(), 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case CHAR:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
byte[] bytes = colHiveChar.getStrippedValue().getBytes();
// We assume the CHAR maximum length was enforced when the object was created.
int length = bytes.length;
int start = buffer.getLength();
try {
// In vector mode, we store CHAR as unpadded.
buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case VARCHAR:
{
BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
byte[] bytes = colHiveVarchar.getValue().getBytes();
// We assume the VARCHAR maximum length was enforced when the object was created.
int length = bytes.length;
int start = buffer.getLength();
try {
buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
} else {
setNullColIsNullValue(bcv, rowIndex);
}
}
break;
case DECIMAL:
DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
dcv.isNull[rowIndex] = false;
HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
dcv.set(rowIndex, wobj);
} else {
setNullColIsNullValue(dcv, rowIndex);
}
break;
default:
throw new HiveException("Vectorizaton is not supported for datatype:" + poi.getPrimitiveCategory());
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class VectorMapJoinOperator method initializeOp.
@Override
public void initializeOp(Configuration hconf) throws HiveException {
// Use a final variable to properly parameterize the processVectorInspector closure.
// Using a member variable in the closure will not do the right thing...
final int parameterizePosBigTable = conf.getPosBigTable();
// Code borrowed from VectorReduceSinkOperator.initializeOp
VectorExpressionWriterFactory.processVectorInspector((StructObjectInspector) inputObjInspectors[parameterizePosBigTable], new VectorExpressionWriterFactory.SingleOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, ObjectInspector objectInspector) {
rowWriters = writers;
inputObjInspectors[parameterizePosBigTable] = objectInspector;
}
});
singleRow = new Object[rowWriters.length];
super.initializeOp(hconf);
List<ExprNodeDesc> keyDesc = conf.getKeys().get(posBigTable);
keyOutputWriters = VectorExpressionWriterFactory.getExpressionWriters(keyDesc);
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
// We're hijacking the big table evaluators an replace them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumn(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
// Filtering is handled in the input batch processing
if (filterMaps != null) {
filterMaps[posBigTable] = null;
}
}
use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.
the class VectorSMBMapJoinOperator method initializeOp.
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
super.initializeOp(hconf);
vrbCtx = new VectorizedRowBatchCtx();
vrbCtx.init((StructObjectInspector) this.outputObjInspector, vOutContext.getScratchColumnTypeNames());
outputBatch = vrbCtx.createVectorizedRowBatch();
keyWrapperBatch = VectorHashKeyWrapperBatch.compileKeyWrapperBatch(keyExpressions);
outputVectorAssignRowMap = new HashMap<ObjectInspector, VectorAssignRow>();
// This key evaluator translates from the vectorized VectorHashKeyWrapper format
// into the row-mode MapJoinKey
keyEvaluator = new SMBJoinKeyEvaluator() {
private List<Object> key;
public SMBJoinKeyEvaluator init() {
key = new ArrayList<Object>();
for (int i = 0; i < keyExpressions.length; ++i) {
key.add(null);
}
return this;
}
@Override
public List<Object> evaluate(VectorHashKeyWrapper kw) throws HiveException {
for (int i = 0; i < keyExpressions.length; ++i) {
key.set(i, keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]));
}
return key;
}
;
}.init();
Map<Byte, List<ExprNodeDesc>> valueExpressions = conf.getExprs();
List<ExprNodeDesc> bigTableExpressions = valueExpressions.get(posBigTable);
// We're hijacking the big table evaluators and replacing them with our own custom ones
// which are going to return values from the input batch vector expressions
List<ExprNodeEvaluator> vectorNodeEvaluators = new ArrayList<ExprNodeEvaluator>(bigTableExpressions.size());
VectorExpressionWriterFactory.processVectorExpressions(bigTableExpressions, new VectorExpressionWriterFactory.ListOIDClosure() {
@Override
public void assign(VectorExpressionWriter[] writers, List<ObjectInspector> oids) {
valueWriters = writers;
joinValuesObjectInspectors[posBigTable] = oids;
}
});
for (int i = 0; i < bigTableExpressions.size(); ++i) {
ExprNodeDesc desc = bigTableExpressions.get(i);
VectorExpression vectorExpr = bigTableValueExpressions[i];
// This is a vectorized aware evaluator
ExprNodeEvaluator eval = new ExprNodeEvaluator<ExprNodeDesc>(desc, hconf) {
int columnIndex;
;
int writerIndex;
public ExprNodeEvaluator initVectorExpr(int columnIndex, int writerIndex) {
this.columnIndex = columnIndex;
this.writerIndex = writerIndex;
return this;
}
@Override
public ObjectInspector initialize(ObjectInspector rowInspector) throws HiveException {
throw new HiveException("should never reach here");
}
@Override
protected Object _evaluate(Object row, int version) throws HiveException {
VectorizedRowBatch inBatch = (VectorizedRowBatch) row;
int rowIndex = inBatch.selectedInUse ? inBatch.selected[batchIndex] : batchIndex;
return valueWriters[writerIndex].writeValue(inBatch.cols[columnIndex], rowIndex);
}
}.initVectorExpr(vectorExpr.getOutputColumn(), i);
vectorNodeEvaluators.add(eval);
}
// Now replace the old evaluators with our own
joinValues[posBigTable] = vectorNodeEvaluators;
}
Aggregations