Search in sources :

Example 31 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestLazyBinaryColumnarSerDe method testSerDe.

@Test
public void testSerDe() throws SerDeException {
    StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
    String cols = ObjectInspectorUtils.getFieldNames(oi);
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    OuterStruct outerStruct = new OuterStruct();
    outerStruct.mByte = 1;
    outerStruct.mShort = 2;
    outerStruct.mInt = 3;
    outerStruct.mLong = 4l;
    outerStruct.mFloat = 5.01f;
    outerStruct.mDouble = 6.001d;
    outerStruct.mString = "seven";
    outerStruct.mBA = new byte[] { '2' };
    InnerStruct is1 = new InnerStruct(8, 9l);
    InnerStruct is2 = new InnerStruct(10, 11l);
    outerStruct.mArray = new ArrayList<InnerStruct>(2);
    outerStruct.mArray.add(is1);
    outerStruct.mArray.add(is2);
    outerStruct.mMap = new TreeMap<String, InnerStruct>();
    outerStruct.mMap.put(new String("twelve"), new InnerStruct(13, 14l));
    outerStruct.mMap.put(new String("fifteen"), new InnerStruct(16, 17l));
    outerStruct.mStruct = new InnerStruct(18, 19l);
    BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
    ObjectInspector out_oi = serde.getObjectInspector();
    Object out_o = serde.deserialize(braw);
    if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new CrossMapEqualComparer())) {
        System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
        System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
        fail("Deserialized object does not compare");
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) CrossMapEqualComparer(org.apache.hadoop.hive.serde2.objectinspector.CrossMapEqualComparer) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 32 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestLazyBinaryColumnarSerDe method testHandlingAlteredSchemas.

/**
 * HIVE-5788
 * <p>
 * Background: in cases of "add column", table metadata changes but data does not.  Columns
 * missing from the data but which are required by metadata are interpreted as null.
 * <p>
 * This tests the use-case of altering columns of a table with already some data, then adding more data
 * in the new schema, and seeing if this serde can to read both types of data from the resultant table.
 * @throws SerDeException
 */
@Test
public void testHandlingAlteredSchemas() throws SerDeException {
    StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(BeforeStruct.class, ObjectInspectorOptions.JAVA);
    String cols = ObjectInspectorUtils.getFieldNames(oi);
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    // serialize some data in the schema before it is altered.
    LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    BeforeStruct bs1 = new BeforeStruct();
    bs1.l1 = 1L;
    bs1.l2 = 2L;
    BytesRefArrayWritable braw1 = (BytesRefArrayWritable) serde.serialize(bs1, oi);
    // alter table add column: change the metadata
    oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(AfterStruct.class, ObjectInspectorOptions.JAVA);
    cols = ObjectInspectorUtils.getFieldNames(oi);
    props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    // serialize some data in the schema after it is altered.
    AfterStruct as = new AfterStruct();
    as.l1 = 11L;
    as.l2 = 12L;
    as.l3 = 13L;
    BytesRefArrayWritable braw2 = (BytesRefArrayWritable) serde.serialize(as, oi);
    // fetch operator
    serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    // fetch the row inserted before schema is altered and verify
    LazyBinaryColumnarStruct struct1 = (LazyBinaryColumnarStruct) serde.deserialize(braw1);
    oi = (StructObjectInspector) serde.getObjectInspector();
    List<Object> objs1 = oi.getStructFieldsDataAsList(struct1);
    Assert.assertEquals(((LongWritable) objs1.get(0)).get(), 1L);
    Assert.assertEquals(((LongWritable) objs1.get(1)).get(), 2L);
    Assert.assertNull(objs1.get(2));
    // fetch the row inserted after schema is altered and verify
    LazyBinaryColumnarStruct struct2 = (LazyBinaryColumnarStruct) serde.deserialize(braw2);
    List<Object> objs2 = struct2.getFieldsAsList();
    Assert.assertEquals(((LongWritable) objs2.get(0)).get(), 11L);
    Assert.assertEquals(((LongWritable) objs2.get(1)).get(), 12L);
    Assert.assertEquals(((LongWritable) objs2.get(2)).get(), 13L);
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 33 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestLazyBinaryColumnarSerDe method testSerDeEmpties.

@Test
public void testSerDeEmpties() throws SerDeException {
    StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
    String cols = ObjectInspectorUtils.getFieldNames(oi);
    Properties props = new Properties();
    props.setProperty(serdeConstants.LIST_COLUMNS, cols);
    props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
    LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
    serde.initialize(new Configuration(), props, null);
    OuterStruct outerStruct = new OuterStruct();
    outerStruct.mByte = 101;
    outerStruct.mShort = 2002;
    outerStruct.mInt = 3003;
    outerStruct.mLong = 4004l;
    outerStruct.mFloat = 5005.01f;
    outerStruct.mDouble = 6006.001d;
    outerStruct.mString = "";
    outerStruct.mBA = new byte[] { 'a' };
    outerStruct.mArray = new ArrayList<InnerStruct>();
    outerStruct.mMap = new TreeMap<String, InnerStruct>();
    outerStruct.mStruct = new InnerStruct(180018, 190019l);
    BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
    ObjectInspector out_oi = serde.getObjectInspector();
    Object out_o = serde.deserialize(braw);
    if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
        System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
        System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
        fail("Deserialized object does not compare");
    }
}
Also used : StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) SimpleMapEqualComparer(org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer) Configuration(org.apache.hadoop.conf.Configuration) Properties(java.util.Properties) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 34 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.

the class TestStatsSerde method deserializeAndSerializeColumnar.

private void deserializeAndSerializeColumnar(ColumnarSerDe serDe, BytesRefArrayWritable t, String[] data) throws SerDeException {
    // Get the row structure
    StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
    // Deserialize
    Object row = serDe.deserialize(t);
    int size = 0;
    for (int i = 0; i < data.length; i++) {
        size += data[i].length();
    }
    assertEquals("serialized size correct after deserialization", size, serDe.getSerDeStats().getRawDataSize());
    assertNotSame(0, size);
    BytesRefArrayWritable serializedData = (BytesRefArrayWritable) serDe.serialize(row, oi);
    size = 0;
    for (int i = 0; i < serializedData.size(); i++) {
        size += serializedData.get(i).getLength();
    }
    assertEquals("serialized size correct after serialization", size, serDe.getSerDeStats().getRawDataSize());
    assertNotSame(0, size);
}
Also used : BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)

Example 35 with BytesRefArrayWritable

use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project DataX by alibaba.

the class DFSUtil method rcFileStartRead.

public void rcFileStartRead(String sourceRcFilePath, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
    LOG.info(String.format("Start Read rcfile [%s].", sourceRcFilePath));
    List<ColumnEntry> column = UnstructuredStorageReaderUtil.getListColumnEntry(readerSliceConfig, com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN);
    // warn: no default value '\N'
    String nullFormat = readerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.NULL_FORMAT);
    Path rcFilePath = new Path(sourceRcFilePath);
    FileSystem fs = null;
    RCFileRecordReader recordReader = null;
    try {
        fs = FileSystem.get(rcFilePath.toUri(), hadoopConf);
        long fileLen = fs.getFileStatus(rcFilePath).getLen();
        FileSplit split = new FileSplit(rcFilePath, 0, fileLen, (String[]) null);
        recordReader = new RCFileRecordReader(hadoopConf, split);
        LongWritable key = new LongWritable();
        BytesRefArrayWritable value = new BytesRefArrayWritable();
        Text txt = new Text();
        while (recordReader.next(key, value)) {
            String[] sourceLine = new String[value.size()];
            txt.clear();
            for (int i = 0; i < value.size(); i++) {
                BytesRefWritable v = value.get(i);
                txt.set(v.getData(), v.getStart(), v.getLength());
                sourceLine[i] = txt.toString();
            }
            UnstructuredStorageReaderUtil.transportOneRecord(recordSender, column, sourceLine, nullFormat, taskPluginCollector);
        }
    } catch (IOException e) {
        String message = String.format("读取文件[%s]时出错", sourceRcFilePath);
        LOG.error(message);
        throw DataXException.asDataXException(HdfsReaderErrorCode.READ_RCFILE_ERROR, message, e);
    } finally {
        try {
            if (recordReader != null) {
                recordReader.close();
                LOG.info("Finally, Close RCFileRecordReader.");
            }
        } catch (IOException e) {
            LOG.warn(String.format("finally: 关闭RCFileRecordReader失败, %s", e.getMessage()));
        }
    }
}
Also used : Path(org.apache.hadoop.fs.Path) BytesRefArrayWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable) RCFileRecordReader(org.apache.hadoop.hive.ql.io.RCFileRecordReader) ColumnEntry(com.alibaba.datax.plugin.unstructuredstorage.reader.ColumnEntry) IOException(java.io.IOException) FileSystem(org.apache.hadoop.fs.FileSystem) BytesRefWritable(org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)

Aggregations

BytesRefArrayWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable)28 BytesRefWritable (org.apache.hadoop.hive.serde2.columnar.BytesRefWritable)17 Configuration (org.apache.hadoop.conf.Configuration)13 LongWritable (org.apache.hadoop.io.LongWritable)12 Path (org.apache.hadoop.fs.Path)11 Test (org.junit.Test)11 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)10 Properties (java.util.Properties)7 RecordReader (org.apache.hadoop.mapred.RecordReader)7 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)5 FileSystem (org.apache.hadoop.fs.FileSystem)4 RCFile (org.apache.hadoop.hive.ql.io.RCFile)4 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)4 JobConf (org.apache.hadoop.mapred.JobConf)4 IOException (java.io.IOException)3 ColumnarSerDe (org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe)3 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)3 SimpleMapEqualComparer (org.apache.hadoop.hive.serde2.objectinspector.SimpleMapEqualComparer)3 CompressionCodec (org.apache.hadoop.io.compress.CompressionCodec)3 Random (java.util.Random)2