use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method columnProjection.
@Test
public void columnProjection() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(1000).compress(CompressionKind.NONE).bufferSize(100).rowIndexStride(1000));
Random r1 = new Random(1);
Random r2 = new Random(2);
int x;
int minInt = 0, maxInt = 0;
String y;
String minStr = null, maxStr = null;
for (int i = 0; i < 21000; ++i) {
x = r1.nextInt();
y = Long.toHexString(r2.nextLong());
if (i == 0 || x < minInt) {
minInt = x;
}
if (i == 0 || x > maxInt) {
maxInt = x;
}
if (i == 0 || y.compareTo(minStr) < 0) {
minStr = y;
}
if (i == 0 || y.compareTo(maxStr) > 0) {
maxStr = y;
}
writer.addRow(inner(x, y));
}
writer.close();
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
// check out the statistics
ColumnStatistics[] stats = reader.getStatistics();
assertEquals(3, stats.length);
for (ColumnStatistics s : stats) {
assertEquals(21000, s.getNumberOfValues());
if (s instanceof IntegerColumnStatistics) {
assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
} else if (s instanceof StringColumnStatistics) {
assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
}
}
// check out the types
List<OrcProto.Type> types = reader.getTypes();
assertEquals(3, types.size());
assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
assertEquals(2, types.get(0).getSubtypesCount());
assertEquals(1, types.get(0).getSubtypes(0));
assertEquals(2, types.get(0).getSubtypes(1));
assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
assertEquals(0, types.get(1).getSubtypesCount());
assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
assertEquals(0, types.get(2).getSubtypesCount());
// read the contents and make sure they match
RecordReader rows1 = reader.rows(new boolean[] { true, true, false });
RecordReader rows2 = reader.rows(new boolean[] { true, false, true });
r1 = new Random(1);
r2 = new Random(2);
OrcStruct row1 = null;
OrcStruct row2 = null;
for (int i = 0; i < 21000; ++i) {
assertEquals(true, rows1.hasNext());
assertEquals(true, rows2.hasNext());
row1 = (OrcStruct) rows1.next(row1);
row2 = (OrcStruct) rows2.next(row2);
assertEquals(r1.nextInt(), ((IntWritable) row1.getFieldValue(0)).get());
assertEquals(Long.toHexString(r2.nextLong()), row2.getFieldValue(1).toString());
}
assertEquals(false, rows1.hasNext());
assertEquals(false, rows2.hasNext());
rows1.close();
rows2.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method testZeroCopySeek.
@Test
public void testZeroCopySeek() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(200000).bufferSize(65536).rowIndexStride(1000));
Random rand = new Random(42);
final int COUNT = 32768;
long[] intValues = new long[COUNT];
double[] doubleValues = new double[COUNT];
String[] stringValues = new String[COUNT];
BytesWritable[] byteValues = new BytesWritable[COUNT];
String[] words = new String[128];
for (int i = 0; i < words.length; ++i) {
words[i] = Integer.toHexString(rand.nextInt());
}
for (int i = 0; i < COUNT / 2; ++i) {
intValues[2 * i] = rand.nextLong();
intValues[2 * i + 1] = intValues[2 * i];
stringValues[2 * i] = words[rand.nextInt(words.length)];
stringValues[2 * i + 1] = stringValues[2 * i];
}
for (int i = 0; i < COUNT; ++i) {
doubleValues[i] = rand.nextDouble();
byte[] buf = new byte[20];
rand.nextBytes(buf);
byteValues[i] = new BytesWritable(buf);
}
for (int i = 0; i < COUNT; ++i) {
writer.addRow(createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i));
}
writer.close();
writer = null;
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(COUNT, reader.getNumberOfRows());
/* enable zero copy record reader */
Configuration conf = new Configuration();
conf.setBoolean(OrcConf.USE_ZEROCOPY.getHiveConfName(), true);
RecordReader rows = reader.rows();
/* all tests are identical to the other seek() tests */
OrcStruct row = null;
for (int i = COUNT - 1; i >= 0; --i) {
// we load the previous buffer of rows
if (i % COUNT == COUNT - 1) {
rows.seekToRow(i - (COUNT - 1));
}
rows.seekToRow(i);
row = (OrcStruct) rows.next(row);
BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i);
assertEquals(expected.boolean1.booleanValue(), ((BooleanWritable) row.getFieldValue(0)).get());
assertEquals(expected.byte1.byteValue(), ((ByteWritable) row.getFieldValue(1)).get());
assertEquals(expected.short1.shortValue(), ((ShortWritable) row.getFieldValue(2)).get());
assertEquals(expected.int1.intValue(), ((IntWritable) row.getFieldValue(3)).get());
assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
assertEquals(expected.float1.floatValue(), ((FloatWritable) row.getFieldValue(5)).get(), 0.0001);
assertEquals(expected.double1.doubleValue(), ((DoubleWritable) row.getFieldValue(6)).get(), 0.0001);
assertEquals(expected.bytes1, row.getFieldValue(7));
assertEquals(expected.string1, row.getFieldValue(8));
List<InnerStruct> expectedList = expected.middle.list;
List<OrcStruct> actualList = (List) ((OrcStruct) row.getFieldValue(9)).getFieldValue(0);
compareList(expectedList, actualList);
compareList(expected.list, (List) row.getFieldValue(10));
}
rows.close();
Iterator<StripeInformation> stripeIterator = reader.getStripes().iterator();
long offsetOfStripe2 = 0;
long offsetOfStripe4 = 0;
long lastRowOfStripe2 = 0;
for (int i = 0; i < 5; ++i) {
StripeInformation stripe = stripeIterator.next();
if (i < 2) {
lastRowOfStripe2 += stripe.getNumberOfRows();
} else if (i == 2) {
offsetOfStripe2 = stripe.getOffset();
lastRowOfStripe2 += stripe.getNumberOfRows() - 1;
} else if (i == 4) {
offsetOfStripe4 = stripe.getOffset();
}
}
boolean[] columns = new boolean[reader.getStatistics().length];
// long colulmn
columns[5] = true;
// text column
columns[9] = true;
/* use zero copy record reader */
rows = reader.rowsOptions(new Reader.Options().range(offsetOfStripe2, offsetOfStripe4 - offsetOfStripe2).include(columns));
rows.seekToRow(lastRowOfStripe2);
for (int i = 0; i < 2; ++i) {
row = (OrcStruct) rows.next(row);
BigRow expected = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, (int) (lastRowOfStripe2 + i));
assertEquals(expected.long1.longValue(), ((LongWritable) row.getFieldValue(4)).get());
assertEquals(expected.string1, row.getFieldValue(8));
}
rows.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testDefaultTypes.
@Test
public void testDefaultTypes() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "str,str2");
properties.setProperty("columns.types", "string:string");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(StringRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, StringRow.class, true, properties, Reporter.NULL);
writer.write(serde.serialize(new StringRow("owen"), inspector));
writer.write(serde.serialize(new StringRow("beth"), inspector));
writer.write(serde.serialize(new StringRow("laurel"), inspector));
writer.write(serde.serialize(new StringRow("hazen"), inspector));
writer.write(serde.serialize(new StringRow("colin"), inspector));
writer.write(serde.serialize(new StringRow("miles"), inspector));
writer.close(true);
serde = new OrcSerde();
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
assertEquals("struct<str:string,str2:string>", inspector.getTypeName());
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(1, splits.length);
// read the whole file
conf.set("columns", StringRow.getColumnNamesProperty());
conf.set("columns.types", StringRow.getColumnTypesProperty());
org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
Object key = reader.createKey();
Writable value = (Writable) reader.createValue();
List<? extends StructField> fields = inspector.getAllStructFieldRefs();
StringObjectInspector strInspector = (StringObjectInspector) fields.get(0).getFieldObjectInspector();
assertEquals(true, reader.next(key, value));
assertEquals("owen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("beth", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("laurel", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("hazen", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("colin", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(true, reader.next(key, value));
assertEquals("miles", strInspector.getPrimitiveJavaObject(inspector.getStructFieldData(value, fields.get(0))));
assertEquals(false, reader.next(key, value));
reader.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestInputOutputFormat method testSplitElimination.
@Test
public void testSplitElimination() throws Exception {
Properties properties = new Properties();
properties.setProperty("columns", "z,r");
properties.setProperty("columns.types", "int:struct<x:int,y:int>");
StructObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(NestedRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
conf.setInt("mapred.max.split.size", 50);
RecordWriter writer = outFormat.getRecordWriter(fs, conf, testFilePath.toString(), Reporter.NULL);
writer.write(NullWritable.get(), serde.serialize(new NestedRow(1, 2, 3), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(4, 5, 6), inspector));
writer.write(NullWritable.get(), serde.serialize(new NestedRow(7, 8, 9), inspector));
writer.close(Reporter.NULL);
serde = new OrcSerde();
SearchArgument sarg = SearchArgumentFactory.newBuilder().startAnd().lessThan("z", PredicateLeaf.Type.LONG, new Long(0)).end().build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z,r");
SerDeUtils.initializeSerDe(serde, conf, properties, null);
inspector = (StructObjectInspector) serde.getObjectInspector();
InputFormat<?, ?> in = new OrcInputFormat();
FileInputFormat.setInputPaths(conf, testFilePath.toString());
InputSplit[] splits = in.getSplits(conf, 1);
assertEquals(0, splits.length);
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFileStripeMergeRecordReader method createOrcFile.
private void createOrcFile(int stripSize, int numberOfRows) throws IOException {
ObjectInspector inspector;
synchronized (TestOrcFileStripeMergeRecordReader.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringIntIntIntRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(tmpPath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(stripSize).compress(CompressionKind.ZLIB).bufferSize(5000).rowIndexStride(1000));
Random rand = new Random(157);
for (int i = 0; i < numberOfRows; i++) {
writer.addRow(new StringIntIntIntRow(Integer.toBinaryString(i), rand.nextInt(), rand.nextInt(), rand.nextInt()));
}
writer.close();
}
Aggregations