use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testSerDe.
@Test
public void testSerDe() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
serde.initialize(new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
outerStruct.mByte = 1;
outerStruct.mShort = 2;
outerStruct.mInt = 3;
outerStruct.mLong = 4l;
outerStruct.mFloat = 5.01f;
outerStruct.mDouble = 6.001d;
outerStruct.mString = "seven";
outerStruct.mBA = new byte[] { '2' };
InnerStruct is1 = new InnerStruct(8, 9l);
InnerStruct is2 = new InnerStruct(10, 11l);
outerStruct.mArray = new ArrayList<InnerStruct>(2);
outerStruct.mArray.add(is1);
outerStruct.mArray.add(is2);
outerStruct.mMap = new TreeMap<String, InnerStruct>();
outerStruct.mMap.put(new String("twelve"), new InnerStruct(13, 14l));
outerStruct.mMap.put(new String("fifteen"), new InnerStruct(16, 17l));
outerStruct.mStruct = new InnerStruct(18, 19l);
BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
ObjectInspector out_oi = serde.getObjectInspector();
Object out_o = serde.deserialize(braw);
if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new CrossMapEqualComparer())) {
System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
fail("Deserialized object does not compare");
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testHandlingAlteredSchemas.
/**
* HIVE-5788
* <p>
* Background: in cases of "add column", table metadata changes but data does not. Columns
* missing from the data but which are required by metadata are interpreted as null.
* <p>
* This tests the use-case of altering columns of a table with already some data, then adding more data
* in the new schema, and seeing if this serde can to read both types of data from the resultant table.
* @throws SerDeException
*/
@Test
public void testHandlingAlteredSchemas() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(BeforeStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
// serialize some data in the schema before it is altered.
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
serde.initialize(new Configuration(), props, null);
BeforeStruct bs1 = new BeforeStruct();
bs1.l1 = 1L;
bs1.l2 = 2L;
BytesRefArrayWritable braw1 = (BytesRefArrayWritable) serde.serialize(bs1, oi);
// alter table add column: change the metadata
oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(AfterStruct.class, ObjectInspectorOptions.JAVA);
cols = ObjectInspectorUtils.getFieldNames(oi);
props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
serde = new LazyBinaryColumnarSerDe();
serde.initialize(new Configuration(), props, null);
// serialize some data in the schema after it is altered.
AfterStruct as = new AfterStruct();
as.l1 = 11L;
as.l2 = 12L;
as.l3 = 13L;
BytesRefArrayWritable braw2 = (BytesRefArrayWritable) serde.serialize(as, oi);
// fetch operator
serde = new LazyBinaryColumnarSerDe();
serde.initialize(new Configuration(), props, null);
// fetch the row inserted before schema is altered and verify
LazyBinaryColumnarStruct struct1 = (LazyBinaryColumnarStruct) serde.deserialize(braw1);
oi = (StructObjectInspector) serde.getObjectInspector();
List<Object> objs1 = oi.getStructFieldsDataAsList(struct1);
Assert.assertEquals(((LongWritable) objs1.get(0)).get(), 1L);
Assert.assertEquals(((LongWritable) objs1.get(1)).get(), 2L);
Assert.assertNull(objs1.get(2));
// fetch the row inserted after schema is altered and verify
LazyBinaryColumnarStruct struct2 = (LazyBinaryColumnarStruct) serde.deserialize(braw2);
List<Object> objs2 = struct2.getFieldsAsList();
Assert.assertEquals(((LongWritable) objs2.get(0)).get(), 11L);
Assert.assertEquals(((LongWritable) objs2.get(1)).get(), 12L);
Assert.assertEquals(((LongWritable) objs2.get(2)).get(), 13L);
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestLazyBinaryColumnarSerDe method testSerDeEmpties.
@Test
public void testSerDeEmpties() throws SerDeException {
StructObjectInspector oi = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(OuterStruct.class, ObjectInspectorOptions.JAVA);
String cols = ObjectInspectorUtils.getFieldNames(oi);
Properties props = new Properties();
props.setProperty(serdeConstants.LIST_COLUMNS, cols);
props.setProperty(serdeConstants.LIST_COLUMN_TYPES, ObjectInspectorUtils.getFieldTypes(oi));
LazyBinaryColumnarSerDe serde = new LazyBinaryColumnarSerDe();
serde.initialize(new Configuration(), props, null);
OuterStruct outerStruct = new OuterStruct();
outerStruct.mByte = 101;
outerStruct.mShort = 2002;
outerStruct.mInt = 3003;
outerStruct.mLong = 4004l;
outerStruct.mFloat = 5005.01f;
outerStruct.mDouble = 6006.001d;
outerStruct.mString = "";
outerStruct.mBA = new byte[] { 'a' };
outerStruct.mArray = new ArrayList<InnerStruct>();
outerStruct.mMap = new TreeMap<String, InnerStruct>();
outerStruct.mStruct = new InnerStruct(180018, 190019l);
BytesRefArrayWritable braw = (BytesRefArrayWritable) serde.serialize(outerStruct, oi);
ObjectInspector out_oi = serde.getObjectInspector();
Object out_o = serde.deserialize(braw);
if (0 != ObjectInspectorUtils.compare(outerStruct, oi, out_o, out_oi, new SimpleMapEqualComparer())) {
System.out.println("expected = " + SerDeUtils.getJSONString(outerStruct, oi));
System.out.println("actual = " + SerDeUtils.getJSONString(out_o, out_oi));
fail("Deserialized object does not compare");
}
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project hive by apache.
the class TestStatsSerde method deserializeAndSerializeColumnar.
private void deserializeAndSerializeColumnar(ColumnarSerDe serDe, BytesRefArrayWritable t, String[] data) throws SerDeException {
// Get the row structure
StructObjectInspector oi = (StructObjectInspector) serDe.getObjectInspector();
// Deserialize
Object row = serDe.deserialize(t);
int size = 0;
for (int i = 0; i < data.length; i++) {
size += data[i].length();
}
assertEquals("serialized size correct after deserialization", size, serDe.getSerDeStats().getRawDataSize());
assertNotSame(0, size);
BytesRefArrayWritable serializedData = (BytesRefArrayWritable) serDe.serialize(row, oi);
size = 0;
for (int i = 0; i < serializedData.size(); i++) {
size += serializedData.get(i).getLength();
}
assertEquals("serialized size correct after serialization", size, serDe.getSerDeStats().getRawDataSize());
assertNotSame(0, size);
}
use of org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable in project DataX by alibaba.
the class DFSUtil method rcFileStartRead.
public void rcFileStartRead(String sourceRcFilePath, Configuration readerSliceConfig, RecordSender recordSender, TaskPluginCollector taskPluginCollector) {
LOG.info(String.format("Start Read rcfile [%s].", sourceRcFilePath));
List<ColumnEntry> column = UnstructuredStorageReaderUtil.getListColumnEntry(readerSliceConfig, com.alibaba.datax.plugin.unstructuredstorage.reader.Key.COLUMN);
// warn: no default value '\N'
String nullFormat = readerSliceConfig.getString(com.alibaba.datax.plugin.unstructuredstorage.reader.Key.NULL_FORMAT);
Path rcFilePath = new Path(sourceRcFilePath);
FileSystem fs = null;
RCFileRecordReader recordReader = null;
try {
fs = FileSystem.get(rcFilePath.toUri(), hadoopConf);
long fileLen = fs.getFileStatus(rcFilePath).getLen();
FileSplit split = new FileSplit(rcFilePath, 0, fileLen, (String[]) null);
recordReader = new RCFileRecordReader(hadoopConf, split);
LongWritable key = new LongWritable();
BytesRefArrayWritable value = new BytesRefArrayWritable();
Text txt = new Text();
while (recordReader.next(key, value)) {
String[] sourceLine = new String[value.size()];
txt.clear();
for (int i = 0; i < value.size(); i++) {
BytesRefWritable v = value.get(i);
txt.set(v.getData(), v.getStart(), v.getLength());
sourceLine[i] = txt.toString();
}
UnstructuredStorageReaderUtil.transportOneRecord(recordSender, column, sourceLine, nullFormat, taskPluginCollector);
}
} catch (IOException e) {
String message = String.format("读取文件[%s]时出错", sourceRcFilePath);
LOG.error(message);
throw DataXException.asDataXException(HdfsReaderErrorCode.READ_RCFILE_ERROR, message, e);
} finally {
try {
if (recordReader != null) {
recordReader.close();
LOG.info("Finally, Close RCFileRecordReader.");
}
} catch (IOException e) {
LOG.warn(String.format("finally: 关闭RCFileRecordReader失败, %s", e.getMessage()));
}
}
}
Aggregations