Search in sources :

Example 6 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class SkewJoinHandler method initiliaze.

public void initiliaze(Configuration hconf) {
    this.hconf = hconf;
    JoinDesc desc = joinOp.getConf();
    skewKeyDefinition = desc.getSkewKeyDefinition();
    skewKeysTableObjectInspector = new HashMap<Byte, StructObjectInspector>(numAliases);
    tblDesc = desc.getSkewKeysValuesTables();
    tblSerializers = new HashMap<Byte, AbstractSerDe>(numAliases);
    bigKeysExistingMap = new HashMap<Byte, Boolean>(numAliases);
    taskId = Utilities.getTaskId(hconf);
    int[][] filterMap = desc.getFilterMap();
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        List<ObjectInspector> skewTableKeyInspectors = new ArrayList<ObjectInspector>();
        StructObjectInspector soi = (StructObjectInspector) joinOp.inputObjInspectors[alias];
        StructField sf = soi.getStructFieldRef(Utilities.ReduceField.KEY.toString());
        List<? extends StructField> keyFields = ((StructObjectInspector) sf.getFieldObjectInspector()).getAllStructFieldRefs();
        int keyFieldSize = keyFields.size();
        for (int k = 0; k < keyFieldSize; k++) {
            skewTableKeyInspectors.add(keyFields.get(k).getFieldObjectInspector());
        }
        TableDesc joinKeyDesc = desc.getKeyTableDesc();
        List<String> keyColNames = Utilities.getColumnNames(joinKeyDesc.getProperties());
        StructObjectInspector structTblKeyInpector = ObjectInspectorFactory.getStandardStructObjectInspector(keyColNames, skewTableKeyInspectors);
        try {
            AbstractSerDe serializer = (AbstractSerDe) ReflectionUtils.newInstance(tblDesc.get(alias).getDeserializerClass(), null);
            SerDeUtils.initializeSerDe(serializer, null, tblDesc.get(alias).getProperties(), null);
            tblSerializers.put((byte) i, serializer);
        } catch (SerDeException e) {
            LOG.error("Skewjoin will be disabled due to " + e.getMessage(), e);
            joinOp.handleSkewJoin = false;
            break;
        }
        boolean hasFilter = filterMap != null && filterMap[i] != null;
        TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias, joinOp.spillTableDesc, conf, !hasFilter);
        List<String> valColNames = new ArrayList<String>();
        if (valTblDesc != null) {
            valColNames = Utilities.getColumnNames(valTblDesc.getProperties());
        }
        StructObjectInspector structTblValInpector = ObjectInspectorFactory.getStandardStructObjectInspector(valColNames, joinOp.joinValuesStandardObjectInspectors[i]);
        StructObjectInspector structTblInpector = ObjectInspectorFactory.getUnionStructObjectInspector(Arrays.asList(structTblValInpector, structTblKeyInpector));
        skewKeysTableObjectInspector.put((byte) i, structTblInpector);
    }
    // reset rowcontainer's serde, objectinspector, and tableDesc.
    for (int i = 0; i < numAliases; i++) {
        Byte alias = conf.getTagOrder()[i];
        RowContainer<ArrayList<Object>> rc = (RowContainer) joinOp.storage[i];
        if (rc != null) {
            rc.setSerDe(tblSerializers.get((byte) i), skewKeysTableObjectInspector.get((byte) i));
            rc.setTableDesc(tblDesc.get(alias));
        }
    }
}
Also used : RowContainer(org.apache.hadoop.hive.ql.exec.persistence.RowContainer) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ArrayList(java.util.ArrayList) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) TableDesc(org.apache.hadoop.hive.ql.plan.TableDesc) JoinDesc(org.apache.hadoop.hive.ql.plan.JoinDesc) SerDeException(org.apache.hadoop.hive.serde2.SerDeException) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 7 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class VectorUDAFAvgDecimal method initPartialResultInspector.

private void initPartialResultInspector() {
    // the output type of the vectorized partial aggregate must match the
    // expected type for the row-mode aggregation
    // For decimal, the type is "same number of integer digits and 4 more decimal digits"
    DecimalTypeInfo dtiSum = GenericUDAFAverage.deriveSumFieldTypeInfo(inputPrecision, inputScale);
    this.sumScale = (short) dtiSum.scale();
    this.sumPrecision = (short) dtiSum.precision();
    List<ObjectInspector> foi = new ArrayList<ObjectInspector>();
    foi.add(PrimitiveObjectInspectorFactory.writableLongObjectInspector);
    foi.add(PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(dtiSum));
    List<String> fname = new ArrayList<String>();
    fname.add("count");
    fname.add("sum");
    soi = ObjectInspectorFactory.getStandardStructObjectInspector(fname, foi);
}
Also used : DecimalTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList)

Example 8 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class RegexSerDe method initialize.

@Override
public void initialize(Configuration conf, Properties tbl) throws SerDeException {
    // We can get the table definition from tbl.
    // Read the configuration parameters
    inputRegex = tbl.getProperty(INPUT_REGEX);
    String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
    String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
    boolean inputRegexIgnoreCase = "true".equalsIgnoreCase(tbl.getProperty(INPUT_REGEX_CASE_SENSITIVE));
    // output format string is not supported anymore, warn user of deprecation
    if (null != tbl.getProperty("output.format.string")) {
        LOG.warn("output.format.string has been deprecated");
    }
    // Parse the configuration parameters
    if (inputRegex != null) {
        inputPattern = Pattern.compile(inputRegex, Pattern.DOTALL + (inputRegexIgnoreCase ? Pattern.CASE_INSENSITIVE : 0));
    } else {
        inputPattern = null;
        throw new SerDeException("This table does not have serde property \"input.regex\"!");
    }
    final String columnNameDelimiter = tbl.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? tbl.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : String.valueOf(SerDeUtils.COMMA);
    List<String> columnNames = Arrays.asList(columnNameProperty.split(columnNameDelimiter));
    columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    assert columnNames.size() == columnTypes.size();
    numColumns = columnNames.size();
    /* Constructing the row ObjectInspector:
     * The row consists of some set of primitive columns, each column will
     * be a java object of primitive type.
     */
    List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(columnNames.size());
    for (int c = 0; c < numColumns; c++) {
        TypeInfo typeInfo = columnTypes.get(c);
        if (typeInfo instanceof PrimitiveTypeInfo) {
            PrimitiveTypeInfo pti = (PrimitiveTypeInfo) columnTypes.get(c);
            AbstractPrimitiveJavaObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pti);
            columnOIs.add(oi);
        } else {
            throw new SerDeException(getClass().getName() + " doesn't allow column [" + c + "] named " + columnNames.get(c) + " with type " + columnTypes.get(c));
        }
    }
    // StandardStruct uses ArrayList to store the row.
    rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnOIs, Lists.newArrayList(Splitter.on('\0').split(tbl.getProperty("columns.comments"))));
    row = new ArrayList<Object>(numColumns);
    // Constructing the row object, etc, which will be reused for all rows.
    for (int c = 0; c < numColumns; c++) {
        row.add(null);
    }
    outputFields = new Object[numColumns];
    outputRowText = new Text();
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) ArrayList(java.util.ArrayList) AbstractPrimitiveJavaObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveJavaObjectInspector) Text(org.apache.hadoop.io.Text) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) VarcharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo) CharTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo) PrimitiveTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)

Example 9 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hive by apache.

the class DynamicSerDe method dynamicSerDeStructBaseToObjectInspector.

public static ObjectInspector dynamicSerDeStructBaseToObjectInspector(DynamicSerDeTypeBase bt) throws SerDeException {
    if (bt.isList()) {
        return ObjectInspectorFactory.getStandardListObjectInspector(dynamicSerDeStructBaseToObjectInspector(((DynamicSerDeTypeList) bt).getElementType()));
    } else if (bt.isMap()) {
        DynamicSerDeTypeMap btMap = (DynamicSerDeTypeMap) bt;
        return ObjectInspectorFactory.getStandardMapObjectInspector(dynamicSerDeStructBaseToObjectInspector(btMap.getKeyType()), dynamicSerDeStructBaseToObjectInspector(btMap.getValueType()));
    } else if (bt.isPrimitive()) {
        PrimitiveTypeEntry pte = PrimitiveObjectInspectorUtils.getTypeEntryFromPrimitiveJavaClass(bt.getRealType());
        return PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(pte.primitiveCategory);
    } else {
        // Must be a struct
        DynamicSerDeStructBase btStruct = (DynamicSerDeStructBase) bt;
        DynamicSerDeFieldList fieldList = btStruct.getFieldList();
        DynamicSerDeField[] fields = fieldList.getChildren();
        ArrayList<String> fieldNames = new ArrayList<String>(fields.length);
        ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fields.length);
        for (DynamicSerDeField field : fields) {
            fieldNames.add(field.name);
            fieldObjectInspectors.add(dynamicSerDeStructBaseToObjectInspector(field.getFieldType().getMyType()));
        }
        return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) PrimitiveTypeEntry(org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveTypeEntry) ArrayList(java.util.ArrayList)

Example 10 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project mongo-hadoop by mongodb.

the class BSONSerDeTest method testStruct.

@Test
public void testStruct() throws SerDeException {
    String columnNames = "m";
    String columnTypes = "struct<one:int,two:string>";
    BasicBSONObject value = new BasicBSONObject();
    int oneValue = 10;
    String twoValue = "key";
    value.put("one", oneValue);
    value.put("two", twoValue);
    // Structs come back as arrays
    ArrayList<Object> returned = new ArrayList<Object>();
    returned.add(oneValue);
    returned.add(twoValue);
    BSONSerDe serde = new BSONSerDe();
    Object result = helpDeserialize(serde, columnNames, columnTypes, value, true);
    assertThat(returned, equalTo(result));
    // A struct must have an array or list of inner inspector types
    ArrayList<ObjectInspector> innerInspectorList = new ArrayList<ObjectInspector>();
    innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(Integer.class));
    innerInspectorList.add(PrimitiveObjectInspectorFactory.getPrimitiveObjectInspectorFromClass(String.class));
    // As well as a fields list
    ArrayList<String> innerFieldsList = new ArrayList<String>();
    innerFieldsList.add("one");
    innerFieldsList.add("two");
    // Then you get that inner struct's inspector
    StructObjectInspector structInspector = ObjectInspectorFactory.getStandardStructObjectInspector(innerFieldsList, innerInspectorList);
    // Which is used to get the overall struct inspector
    StructObjectInspector oi = createObjectInspector(columnNames, structInspector);
    // This should be how it turns out
    BasicBSONObject bObject = new BasicBSONObject();
    bObject.put(columnNames, value);
    // But structs are stored as array/list inside hive, so this is passed in
    ArrayList<Object> obj = new ArrayList<Object>();
    obj.add(returned);
    Object serialized = serde.serialize(obj, oi);
    assertThat(new BSONWritable(bObject), equalTo(serialized));
}
Also used : ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) BSONWritable(com.mongodb.hadoop.io.BSONWritable) BasicBSONObject(org.bson.BasicBSONObject) BasicBSONObject(org.bson.BasicBSONObject) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)78 ArrayList (java.util.ArrayList)73 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)56 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)20 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)16 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)12 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)11 Text (org.apache.hadoop.io.Text)11 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)10 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)10 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)10 Test (org.junit.Test)10 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 StandardStructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector)9 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)8 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)8 IOException (java.io.IOException)7 StandardListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector)7 WritableHiveCharObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableHiveCharObjectInspector)7