Search in sources :

Example 71 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project DataX by alibaba.

the class HdfsHelper method orcFileStartWrite.

/**
     * 写orcfile类型文件
     * @param lineReceiver
     * @param config
     * @param fileName
     * @param taskPluginCollector
     */
public void orcFileStartWrite(RecordReceiver lineReceiver, Configuration config, String fileName, TaskPluginCollector taskPluginCollector) {
    List<Configuration> columns = config.getListConfiguration(Key.COLUMN);
    String compress = config.getString(Key.COMPRESS, null);
    List<String> columnNames = getColumnNames(columns);
    List<ObjectInspector> columnTypeInspectors = getColumnTypeInspectors(columns);
    StructObjectInspector inspector = (StructObjectInspector) ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, columnTypeInspectors);
    OrcSerde orcSerde = new OrcSerde();
    FileOutputFormat outFormat = new OrcOutputFormat();
    if (!"NONE".equalsIgnoreCase(compress) && null != compress) {
        Class<? extends CompressionCodec> codecClass = getCompressCodec(compress);
        if (null != codecClass) {
            outFormat.setOutputCompressorClass(conf, codecClass);
        }
    }
    try {
        RecordWriter writer = outFormat.getRecordWriter(fileSystem, conf, fileName, Reporter.NULL);
        Record record = null;
        while ((record = lineReceiver.getFromReader()) != null) {
            MutablePair<List<Object>, Boolean> transportResult = transportOneRecord(record, columns, taskPluginCollector);
            if (!transportResult.getRight()) {
                writer.write(NullWritable.get(), orcSerde.serialize(transportResult.getLeft(), inspector));
            }
        }
        writer.close(Reporter.NULL);
    } catch (Exception e) {
        String message = String.format("写文件文件[%s]时发生IO异常,请检查您的网络是否正常!", fileName);
        LOG.error(message);
        Path path = new Path(fileName);
        deleteDir(path.getParent());
        throw DataXException.asDataXException(HdfsWriterErrorCode.Write_FILE_IO_ERROR, e);
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Configuration(com.alibaba.datax.common.util.Configuration) OrcOutputFormat(org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat) IOException(java.io.IOException) DataXException(com.alibaba.datax.common.exception.DataXException) OrcSerde(org.apache.hadoop.hive.ql.io.orc.OrcSerde) Record(com.alibaba.datax.common.element.Record) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 72 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project hadoop-pcap by RIPE-NCC.

the class PcapDeserializer method initialize.

@Override
public void initialize(Configuration cfg, Properties props) throws SerDeException {
    String columnNameProperty = props.getProperty(Constants.LIST_COLUMNS);
    columnNames = Arrays.asList(columnNameProperty.split(","));
    numColumns = columnNames.size();
    String columnTypeProperty = props.getProperty(Constants.LIST_COLUMN_TYPES);
    List<TypeInfo> columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
    // Ensure we have the same number of column names and types
    assert numColumns == columnTypes.size();
    List<ObjectInspector> inspectors = new ArrayList<ObjectInspector>(numColumns);
    row = new ArrayList<Object>(numColumns);
    for (int c = 0; c < numColumns; c++) {
        ObjectInspector oi = TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(columnTypes.get(c));
        inspectors.add(oi);
        row.add(null);
    }
    inspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, inspectors);
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ArrayList(java.util.ArrayList) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)

Example 73 with ObjectInspectorFactory.getStandardStructObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getStandardStructObjectInspector in project cdap by caskdata.

the class StandardObjectInspectorsTest method testStandardUnionObjectInspector.

@Test
public void testStandardUnionObjectInspector() throws Throwable {
    try {
        ArrayList<ObjectInspector> objectInspectors = new ArrayList<>();
        // add primitive types
        objectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        objectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        objectInspectors.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
        // add a list
        objectInspectors.add(ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector));
        // add a map
        objectInspectors.add(ObjectInspectorFactory.getStandardMapObjectInspector(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.javaStringObjectInspector));
        // add a struct
        List<String> fieldNames = new ArrayList<>();
        fieldNames.add("myDouble");
        fieldNames.add("myLong");
        ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<>();
        fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaDoubleObjectInspector);
        fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaLongObjectInspector);
        objectInspectors.add(ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldObjectInspectors));
        StandardUnionObjectInspector uoi1 = ObjectInspectorFactory.getStandardUnionObjectInspector(objectInspectors);
        StandardUnionObjectInspector uoi2 = ObjectInspectorFactory.getStandardUnionObjectInspector((ArrayList<ObjectInspector>) objectInspectors.clone());
        Assert.assertEquals(uoi1, uoi2);
        Assert.assertEquals(ObjectInspectorUtils.getObjectInspectorName(uoi1), ObjectInspectorUtils.getObjectInspectorName(uoi2));
        Assert.assertTrue(ObjectInspectorUtils.compareTypes(uoi1, uoi2));
        // compareSupported returns false because Union can contain
        // an object of Map
        Assert.assertFalse(ObjectInspectorUtils.compareSupported(uoi1));
        // construct unionObjectInspector without Map field.
        ArrayList<ObjectInspector> ois = (ArrayList<ObjectInspector>) objectInspectors.clone();
        ois.set(4, PrimitiveObjectInspectorFactory.javaIntObjectInspector);
        Assert.assertTrue(ObjectInspectorUtils.compareSupported(ObjectInspectorFactory.getStandardUnionObjectInspector(ois)));
        // metadata
        Assert.assertEquals(Category.UNION, uoi1.getCategory());
        List<? extends ObjectInspector> uois = uoi1.getObjectInspectors();
        Assert.assertEquals(6, uois.size());
        for (int i = 0; i < 6; i++) {
            Assert.assertEquals(objectInspectors.get(i), uois.get(i));
        }
        StringBuilder unionTypeName = new StringBuilder();
        unionTypeName.append("uniontype<");
        for (int i = 0; i < uois.size(); i++) {
            if (i > 0) {
                unionTypeName.append(",");
            }
            unionTypeName.append(uois.get(i).getTypeName());
        }
        unionTypeName.append(">");
        Assert.assertEquals(unionTypeName.toString(), uoi1.getTypeName());
        // TypeInfo
        TypeInfo typeInfo1 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi1);
        Assert.assertEquals(Category.UNION, typeInfo1.getCategory());
        Assert.assertEquals(UnionTypeInfo.class.getName(), typeInfo1.getClass().getName());
        Assert.assertEquals(typeInfo1.getTypeName(), uoi1.getTypeName());
        Assert.assertEquals(typeInfo1, TypeInfoUtils.getTypeInfoFromTypeString(uoi1.getTypeName()));
        TypeInfo typeInfo2 = TypeInfoUtils.getTypeInfoFromObjectInspector(uoi2);
        Assert.assertEquals(typeInfo1, typeInfo2);
        Assert.assertEquals(TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardJavaObjectInspectorFromTypeInfo(typeInfo2));
        Assert.assertEquals(TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo1), TypeInfoUtils.getStandardWritableObjectInspectorFromTypeInfo(typeInfo2));
        // null
        Assert.assertNull(uoi1.getField(null));
        Assert.assertEquals(-1, uoi1.getTag(null));
        // Union
        UnionObject union = new StandardUnionObjectInspector.StandardUnion((byte) 0, 1);
        Assert.assertEquals(0, uoi1.getTag(union));
        Assert.assertEquals(1, uoi1.getField(union));
        Assert.assertEquals("{0:1}", SerDeUtils.getJSONString(union, uoi1));
        Assert.assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnionObjectInspector.StandardUnion((byte) 0, 1), uoi2));
        Assert.assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(1));
        union = new StandardUnionObjectInspector.StandardUnion((byte) 1, "two");
        Assert.assertEquals(1, uoi1.getTag(union));
        Assert.assertEquals("two", uoi1.getField(union));
        Assert.assertEquals("{1:\"two\"}", SerDeUtils.getJSONString(union, uoi1));
        Assert.assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnionObjectInspector.StandardUnion((byte) 1, "two"), uoi2));
        Assert.assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals("two"));
        union = new StandardUnionObjectInspector.StandardUnion((byte) 2, true);
        Assert.assertEquals(2, uoi1.getTag(union));
        Assert.assertEquals(true, uoi1.getField(union));
        Assert.assertEquals("{2:true}", SerDeUtils.getJSONString(union, uoi1));
        Assert.assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnionObjectInspector.StandardUnion((byte) 2, true), uoi2));
        Assert.assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(true));
        ArrayList<Integer> iList = new ArrayList<>();
        iList.add(4);
        iList.add(5);
        union = new StandardUnionObjectInspector.StandardUnion((byte) 3, iList);
        Assert.assertEquals(3, uoi1.getTag(union));
        Assert.assertEquals(iList, uoi1.getField(union));
        Assert.assertEquals("{3:[4,5]}", SerDeUtils.getJSONString(union, uoi1));
        Assert.assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnionObjectInspector.StandardUnion((byte) 3, iList.clone()), uoi2));
        Assert.assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(iList));
        HashMap<Integer, String> map = new HashMap<>();
        map.put(6, "six");
        map.put(7, "seven");
        map.put(8, "eight");
        union = new StandardUnionObjectInspector.StandardUnion((byte) 4, map);
        Assert.assertEquals(4, uoi1.getTag(union));
        Assert.assertEquals(map, uoi1.getField(union));
        Assert.assertEquals("{4:{6:\"six\",7:\"seven\",8:\"eight\"}}", SerDeUtils.getJSONString(union, uoi1));
        Throwable th = null;
        try {
            ObjectInspectorUtils.compare(union, uoi1, new StandardUnionObjectInspector.StandardUnion((byte) 4, map.clone()), uoi2, null);
        } catch (Throwable t) {
            th = t;
        }
        Assert.assertNotNull(th);
        Assert.assertEquals("Compare on map type not supported!", th.getMessage());
        Assert.assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(map));
        ArrayList<Object> struct = new ArrayList<>(2);
        struct.add(9.0);
        struct.add(10L);
        union = new StandardUnionObjectInspector.StandardUnion((byte) 5, struct);
        Assert.assertEquals(5, uoi1.getTag(union));
        Assert.assertEquals(struct, uoi1.getField(union));
        Assert.assertEquals("{5:{\"mydouble\":9.0,\"mylong\":10}}", SerDeUtils.getJSONString(union, uoi1));
        Assert.assertEquals(0, ObjectInspectorUtils.compare(union, uoi1, new StandardUnionObjectInspector.StandardUnion((byte) 5, struct.clone()), uoi2));
        Assert.assertTrue(ObjectInspectorUtils.copyToStandardObject(union, uoi1).equals(struct));
    } catch (Throwable e) {
        e.printStackTrace();
        throw e;
    }
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) TypeInfo(org.apache.hadoop.hive.serde2.typeinfo.TypeInfo) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) UnionObject(org.apache.hadoop.hive.serde2.objectinspector.UnionObject) UnionTypeInfo(org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo) Test(org.junit.Test)

Aggregations

ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)69 ArrayList (java.util.ArrayList)67 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)51 PrimitiveObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector)16 PrimitiveTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo)13 StructField (org.apache.hadoop.hive.serde2.objectinspector.StructField)12 TypeInfo (org.apache.hadoop.hive.serde2.typeinfo.TypeInfo)12 Text (org.apache.hadoop.io.Text)11 HiveException (org.apache.hadoop.hive.ql.metadata.HiveException)9 SerDeException (org.apache.hadoop.hive.serde2.SerDeException)8 Test (org.junit.Test)8 IOException (java.io.IOException)7 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)7 ListTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo)6 MapTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo)6 StructTypeInfo (org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo)6 HiveVarchar (org.apache.hadoop.hive.common.type.HiveVarchar)5 ExprNodeEvaluator (org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator)4 UDFArgumentException (org.apache.hadoop.hive.ql.exec.UDFArgumentException)4 ExprNodeDesc (org.apache.hadoop.hive.ql.plan.ExprNodeDesc)4