Search in sources :

Example 1 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class GenericUDTFGetSplits method initialize.

@Override
public StructObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    LOG.debug("initializing GenericUDFGetSplits");
    if (SessionState.get() == null || SessionState.get().getConf() == null) {
        throw new IllegalStateException("Cannot run get splits outside HS2");
    }
    LOG.debug("Initialized conf, jc and metastore connection");
    if (arguments.length != 2) {
        throw new UDFArgumentLengthException("The function GET_SPLITS accepts 2 arguments.");
    } else if (!(arguments[0] instanceof StringObjectInspector)) {
        LOG.error("Got " + arguments[0].getTypeName() + " instead of string.");
        throw new UDFArgumentTypeException(0, "\"" + "string\" is expected at function GET_SPLITS, " + "but \"" + arguments[0].getTypeName() + "\" is found");
    } else if (!(arguments[1] instanceof IntObjectInspector)) {
        LOG.error("Got " + arguments[1].getTypeName() + " instead of int.");
        throw new UDFArgumentTypeException(1, "\"" + "int\" is expected at function GET_SPLITS, " + "but \"" + arguments[1].getTypeName() + "\" is found");
    }
    stringOI = (StringObjectInspector) arguments[0];
    intOI = (IntObjectInspector) arguments[1];
    List<String> names = Arrays.asList("split");
    List<ObjectInspector> fieldOIs = Arrays.<ObjectInspector>asList(PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector);
    StructObjectInspector outputOI = ObjectInspectorFactory.getStandardStructObjectInspector(names, fieldOIs);
    LOG.debug("done initializing GenericUDFGetSplits");
    return outputOI;
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) UDFArgumentLengthException(org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException) UDFArgumentTypeException(org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 2 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class GenericUDTFStack method initialize.

@Override
public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
    if (args.length < 2) {
        throw new UDFArgumentException("STACK() expects at least two arguments.");
    }
    if (!(args[0] instanceof ConstantObjectInspector)) {
        throw new UDFArgumentException("The first argument to STACK() must be a constant integer (got " + args[0].getTypeName() + " instead).");
    }
    numRows = (IntWritable) ((ConstantObjectInspector) args[0]).getWritableConstantValue();
    if (numRows == null || numRows.get() < 1) {
        throw new UDFArgumentException("STACK() expects its first argument to be >= 1.");
    }
    // Divide and round up.
    numCols = (args.length - 1 + numRows.get() - 1) / numRows.get();
    for (int jj = 0; jj < numCols; ++jj) {
        returnOIResolvers.add(new ReturnObjectInspectorResolver());
        for (int ii = 0; ii < numRows.get(); ++ii) {
            int index = ii * numCols + jj + 1;
            if (index < args.length && !returnOIResolvers.get(jj).update(args[index])) {
                throw new UDFArgumentException("Argument " + (jj + 1) + "'s type (" + args[jj + 1].getTypeName() + ") should be equal to argument " + index + "'s type (" + args[index].getTypeName() + ")");
            }
        }
    }
    forwardObj = new Object[numCols];
    for (int ii = 0; ii < args.length; ++ii) {
        argOIs.add(args[ii]);
    }
    ArrayList<String> fieldNames = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    for (int ii = 0; ii < numCols; ++ii) {
        fieldNames.add("col" + ii);
        fieldOIs.add(returnOIResolvers.get(ii).get());
    }
    return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
}
Also used : UDFArgumentException(org.apache.hadoop.hive.ql.exec.UDFArgumentException) WritableConstantIntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableConstantIntObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ReturnObjectInspectorResolver(org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ReturnObjectInspectorResolver) ArrayList(java.util.ArrayList) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector)

Example 3 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class WindowingTableFunction method execute.

@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void execute(PTFPartitionIterator<Object> pItr, PTFPartition outP) throws HiveException {
    ArrayList<List<?>> oColumns = new ArrayList<List<?>>();
    PTFPartition iPart = pItr.getPartition();
    StructObjectInspector inputOI = iPart.getOutputOI();
    WindowTableFunctionDef wTFnDef = (WindowTableFunctionDef) getTableDef();
    for (WindowFunctionDef wFn : wTFnDef.getWindowFunctions()) {
        boolean processWindow = processWindow(wFn.getWindowFrame());
        pItr.reset();
        if (!processWindow) {
            Object out = evaluateFunctionOnPartition(wFn, iPart);
            if (!wFn.isPivotResult()) {
                out = new SameList(iPart.size(), out);
            }
            oColumns.add((List<?>) out);
        } else {
            oColumns.add(executeFnwithWindow(wFn, iPart));
        }
    }
    for (int i = 0; i < iPart.size(); i++) {
        ArrayList oRow = new ArrayList();
        Object iRow = iPart.getAt(i);
        for (int j = 0; j < oColumns.size(); j++) {
            oRow.add(oColumns.get(j).get(i));
        }
        for (StructField f : inputOI.getAllStructFieldRefs()) {
            oRow.add(inputOI.getStructFieldData(iRow, f));
        }
        outP.append(oRow);
    }
}
Also used : StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) PTFPartition(org.apache.hadoop.hive.ql.exec.PTFPartition) ArrayList(java.util.ArrayList) WindowTableFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowTableFunctionDef) AbstractList(java.util.AbstractList) ArrayList(java.util.ArrayList) List(java.util.List) WindowFunctionDef(org.apache.hadoop.hive.ql.plan.ptf.WindowFunctionDef) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 4 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class MatchPath method createSelectListOI.

protected static StructObjectInspector createSelectListOI(MatchPath evaluator, PTFInputDef inpDef) {
    StructObjectInspector inOI = inpDef.getOutputShape().getOI();
    ArrayList<String> inputColumnNames = new ArrayList<String>();
    ArrayList<String> selectListNames = new ArrayList<String>();
    ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>();
    for (StructField f : inOI.getAllStructFieldRefs()) {
        String inputColName = evaluator.inputColumnNamesMap.get(f.getFieldName());
        if (inputColName != null) {
            inputColumnNames.add(inputColName);
            selectListNames.add(f.getFieldName());
            fieldOIs.add(f.getFieldObjectInspector());
        }
    }
    StandardListObjectInspector pathAttrOI = ObjectInspectorFactory.getStandardListObjectInspector(ObjectInspectorFactory.getStandardStructObjectInspector(inputColumnNames, fieldOIs));
    ArrayList<ObjectInspector> selectFieldOIs = new ArrayList<ObjectInspector>();
    selectFieldOIs.addAll(fieldOIs);
    selectFieldOIs.add(pathAttrOI);
    selectListNames.add(MatchPath.PATHATTR_NAME);
    return ObjectInspectorFactory.getStandardStructObjectInspector(selectListNames, selectFieldOIs);
}
Also used : PrimitiveObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ConstantObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) ArrayList(java.util.ArrayList) StandardListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 5 with StructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector in project hive by apache.

the class TestVectorSerDeRow method testVectorSerializeRow.

void testVectorSerializeRow(Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
    String[] emptyScratchTypeNames = new String[0];
    VectorRandomRowSource source = new VectorRandomRowSource();
    source.init(r);
    VectorizedRowBatchCtx batchContext = new VectorizedRowBatchCtx();
    batchContext.init(source.rowStructObjectInspector(), emptyScratchTypeNames);
    VectorizedRowBatch batch = batchContext.createVectorizedRowBatch();
    VectorAssignRow vectorAssignRow = new VectorAssignRow();
    vectorAssignRow.init(source.typeNames());
    int fieldCount = source.typeNames().size();
    DeserializeRead deserializeRead;
    SerializeWrite serializeWrite;
    switch(serializationType) {
        case BINARY_SORTABLE:
            deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new BinarySortableSerializeWrite(fieldCount);
            break;
        case LAZY_BINARY:
            deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
            false);
            serializeWrite = new LazyBinarySerializeWrite(fieldCount);
            break;
        case LAZY_SIMPLE:
            {
                StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
                LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
                byte separator = (byte) '\t';
                deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */
                false, separator, lazySerDeParams);
                serializeWrite = new LazySimpleSerializeWrite(fieldCount, separator, lazySerDeParams);
            }
            break;
        default:
            throw new Error("Unknown serialization type " + serializationType);
    }
    VectorSerializeRow vectorSerializeRow = new VectorSerializeRow(serializeWrite);
    vectorSerializeRow.init(source.typeNames());
    Object[][] randomRows = source.randomRows(100000);
    int firstRandomRowIndex = 0;
    for (int i = 0; i < randomRows.length; i++) {
        Object[] row = randomRows[i];
        vectorAssignRow.assignRow(batch, batch.size, row);
        batch.size++;
        if (batch.size == batch.DEFAULT_SIZE) {
            serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
            firstRandomRowIndex = i + 1;
            batch.reset();
        }
    }
    if (batch.size > 0) {
        serializeBatch(batch, vectorSerializeRow, deserializeRead, source, randomRows, firstRandomRowIndex);
    }
}
Also used : LazySerDeParameters(org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters) DeserializeRead(org.apache.hadoop.hive.serde2.fast.DeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) BinarySortableDeserializeRead(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) LazySimpleDeserializeRead(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleDeserializeRead) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) LazyBinaryDeserializeRead(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead) SerializeWrite(org.apache.hadoop.hive.serde2.fast.SerializeWrite) LazyBinarySerializeWrite(org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite) BinarySortableSerializeWrite(org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite) LazySimpleSerializeWrite(org.apache.hadoop.hive.serde2.lazy.fast.LazySimpleSerializeWrite) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)232 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)113 ArrayList (java.util.ArrayList)84 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)69 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)46 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)42 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)42 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)40 Test (org.junit.Test)38 Properties (java.util.Properties)35 Text (org.apache.hadoop.io.Text)32 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)30 Path (org.apache.hadoop.fs.Path)29 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)27 IOException (java.io.IOException)25 Configuration (org.apache.hadoop.conf.Configuration)25 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)24 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)24 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)23 InputSplit (org.apache.hadoop.mapred.InputSplit)23