use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.
the class TestInputOutputFormat method testMROutput.
@Test
public void testMROutput() throws Exception {
Properties properties = new Properties();
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
properties.setProperty("columns", "z,r");
properties.setProperty("columns.types", "int:struct<x:int,y:int>");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
conf.set("columns", "z,r");
conf.set("columns.types", "int:struct<x:int,y:int>");
org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Object value = reader.createValue();
int rowNum = 0;
List<? extends StructField> fields = inspector.getAllStructFieldRefs();
StructObjectInspector inner = (StructObjectInspector) fields.get(1).getFieldObjectInspector();
List<? extends StructField> inFields = inner.getAllStructFieldRefs();
IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
while (reader.next(key, value)) {
assertEquals(null, inspector.getStructFieldData(value, fields.get(0)));
Object sub = inspector.getStructFieldData(value, fields.get(1));
assertEquals(3 * rowNum + 1, intInspector.get(inner.getStructFieldData(sub, inFields.get(0))));
assertEquals(3 * rowNum + 2, intInspector.get(inner.getStructFieldData(sub, inFields.get(1))));
rowNum += 1;
}
assertEquals(3, rowNum);
reader.close();
}
use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.
the class TestInputOutputFormat method testRowNumberUniquenessInDifferentSplits.
/**
* also see {@link TestOrcFile#testPredicatePushdown()}
* This tests that {@link RecordReader#getRowNumber()} works with multiple splits
* @throws Exception
*/
@Test
public void testRowNumberUniquenessInDifferentSplits() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "x,y");
properties.setProperty("columns.types", "int:int");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
// Save the conf variable values so that they can be restored later.
long oldDefaultStripeSize = conf.getLong(OrcConf.STRIPE_SIZE.getHiveConfName(), -1L);
long oldMaxSplitSize = conf.getLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, -1L);
// Set the conf variable values for this test.
// 10000 bytes per stripe
long newStripeSize = 10000L;
// 1024 bytes per split
long newMaxSplitSize = 100L;
conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), newStripeSize);
conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, newMaxSplitSize);
AbstractSerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL);
// The following loop should create 20 stripes in the orc file.
for (int i = 0; i < newStripeSize * 10; ++i) {
writer.write(serde.serialize(new MyRow(i, i + 1), inspector));
}
writer.close(true);
serde = new OrcSerde();
SerDeUtils.initializeSerDe(serde, conf, properties, null);
assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<x:int,y:int>", inspector.getTypeName());
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
int numExpectedSplits = 20;
InputSplit[] splits = in.getSplits(conf, numExpectedSplits);
assertEquals(numExpectedSplits, splits.length);
for (int i = 0; i < numExpectedSplits; ++i) {
OrcSplit split = (OrcSplit) splits[i];
Reader.Options orcReaderOptions = new Reader.Options();
orcReaderOptions.range(split.getStart(), split.getLength());
OrcFile.ReaderOptions qlReaderOptions = OrcFile.readerOptions(conf).maxLength(split.getFileLength());
Reader reader = OrcFile.createReader(split.getPath(), qlReaderOptions);
RecordReader recordReader = reader.rowsOptions(orcReaderOptions);
for (int j = 0; recordReader.hasNext(); j++) {
long rowNum = (i * 5000) + j;
long rowNumActual = recordReader.getRowNumber();
assertEquals("rowNum=" + rowNum, rowNum, rowNumActual);
Object row = recordReader.next(null);
}
recordReader.close();
}
// Reset the conf variable values that we changed for this test.
if (oldDefaultStripeSize != -1L) {
conf.setLong(OrcConf.STRIPE_SIZE.getHiveConfName(), oldDefaultStripeSize);
} else {
// this means that nothing was set for default stripe size previously, so we should unset it.
conf.unset(OrcConf.STRIPE_SIZE.getHiveConfName());
}
if (oldMaxSplitSize != -1L) {
conf.setLong(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname, oldMaxSplitSize);
} else {
// this means that nothing was set for default stripe size previously, so we should unset it.
conf.unset(HiveConf.ConfVars.MAPREDMAXSPLITSIZE.varname);
}
}
use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.
the class TestRCFile method setup.
@Before
public void setup() throws Exception {
conf = new Configuration();
ColumnProjectionUtils.setReadAllColumns(conf);
fs = FileSystem.getLocal(conf);
dir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred");
file = new Path(dir, "test_rcfile");
cleanup();
// the SerDe part is from TestLazySimpleSerDe
serDe = new ColumnarSerDe();
// Create the SerDe
tbl = createProperties();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
try {
bytesArray = new byte[][] { "123".getBytes("UTF-8"), "456".getBytes("UTF-8"), "789".getBytes("UTF-8"), "1000".getBytes("UTF-8"), "5.3".getBytes("UTF-8"), "hive and hadoop".getBytes("UTF-8"), new byte[0], "NULL".getBytes("UTF-8") };
s = new BytesRefArrayWritable(bytesArray.length);
s.set(0, new BytesRefWritable("123".getBytes("UTF-8")));
s.set(1, new BytesRefWritable("456".getBytes("UTF-8")));
s.set(2, new BytesRefWritable("789".getBytes("UTF-8")));
s.set(3, new BytesRefWritable("1000".getBytes("UTF-8")));
s.set(4, new BytesRefWritable("5.3".getBytes("UTF-8")));
s.set(5, new BytesRefWritable("hive and hadoop".getBytes("UTF-8")));
s.set(6, new BytesRefWritable("NULL".getBytes("UTF-8")));
s.set(7, new BytesRefWritable("NULL".getBytes("UTF-8")));
// partial test init
patialS.set(0, new BytesRefWritable("NULL".getBytes("UTF-8")));
patialS.set(1, new BytesRefWritable("NULL".getBytes("UTF-8")));
patialS.set(2, new BytesRefWritable("789".getBytes("UTF-8")));
patialS.set(3, new BytesRefWritable("1000".getBytes("UTF-8")));
patialS.set(4, new BytesRefWritable("NULL".getBytes("UTF-8")));
// LazyString has no so-called NULL sequence. The value is empty string if not.
patialS.set(5, new BytesRefWritable("".getBytes("UTF-8")));
patialS.set(6, new BytesRefWritable("NULL".getBytes("UTF-8")));
// LazyString has no so-called NULL sequence. The value is empty string if not.
patialS.set(7, new BytesRefWritable("".getBytes("UTF-8")));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.
the class TestStatsSerde method testColumnarSerDe.
/**
* Test ColumnarSerDe
*/
public void testColumnarSerDe() throws Throwable {
try {
System.out.println("test: testColumnarSerde");
// Create the SerDe
ColumnarSerDe serDe = new ColumnarSerDe();
Configuration conf = new Configuration();
Properties tbl = createProperties();
SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
// Data
BytesRefArrayWritable braw = new BytesRefArrayWritable(8);
String[] data = { "123", "456", "789", "1000", "5.3", "hive and hadoop", "1.", "NULL" };
for (int i = 0; i < 8; i++) {
braw.set(i, new BytesRefWritable(data[i].getBytes()));
}
// Test
deserializeAndSerializeColumnar(serDe, braw, data);
System.out.println("test: testColumnarSerde - OK");
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.hive.serde2.SerDe in project hive by apache.
the class TestLazyBinarySerDe method testLazyBinarySerDe.
/**
* Test the LazyBinarySerDe.
*
* @param rows
* array of structs to be serialized
* @param rowOI
* array of struct object inspectors
* @param serde
* the serde
* @throws Throwable
*/
private void testLazyBinarySerDe(Object[] rows, ObjectInspector rowOI, AbstractSerDe serde) throws Throwable {
ObjectInspector serdeOI = serde.getObjectInspector();
// Try to serialize
BytesWritable[] bytes = new BytesWritable[rows.length];
for (int i = 0; i < rows.length; i++) {
BytesWritable s = (BytesWritable) serde.serialize(rows[i], rowOI);
bytes[i] = new BytesWritable();
bytes[i].set(s);
}
// Try to deserialize
Object[] deserialized = new Object[rows.length];
for (int i = 0; i < rows.length; i++) {
deserialized[i] = serde.deserialize(bytes[i]);
if (0 != ObjectInspectorUtils.compare(rows[i], rowOI, deserialized[i], serdeOI)) {
System.out.println("structs[" + i + "] = " + SerDeUtils.getJSONString(rows[i], rowOI));
System.out.println("deserialized[" + i + "] = " + SerDeUtils.getJSONString(deserialized[i], serdeOI));
System.out.println("serialized[" + i + "] = " + TestBinarySortableSerDe.hexString(bytes[i]));
assertEquals(rows[i], deserialized[i]);
}
}
}
Aggregations