use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method testListExpansion.
@Test
public void testListExpansion() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyList.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector));
MyList row = new MyList();
row.list.add(1);
row.list.add(2);
row.list.add(3);
writer.addRow(row);
row.list.clear();
writer.addRow(row);
row.list.add(11);
row.list.add(12);
writer.addRow(row);
row.list = null;
writer.addRow(row);
row.list = new ArrayList<>();
row.list.add(21);
row.list.add(22);
row.list.add(23);
row.list.add(24);
writer.addRow(row);
writer.close();
RecordReader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf)).rows();
assertEquals(true, reader.hasNext());
OrcStruct orcrow = (OrcStruct) reader.next(null);
assertEquals(3, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
orcrow = (OrcStruct) reader.next(row);
assertEquals(0, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
orcrow = (OrcStruct) reader.next(row);
assertEquals(2, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
assertEquals(null, ((OrcStruct) reader.next(row)).getFieldValue(0));
orcrow = (OrcStruct) reader.next(row);
assertEquals(4, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
assertEquals(false, reader.hasNext());
reader.close();
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method testMemoryManagementV12.
@Test
public void testMemoryManagementV12() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE).stripeSize(50000).bufferSize(100).rowIndexStride(0).memory(memory).batchSize(100).version(OrcFile.Version.V_0_12));
assertEquals(testFilePath, memory.path);
for (int i = 0; i < 2500; ++i) {
writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
}
writer.close();
assertEquals(null, memory.path);
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
int i = 0;
for (StripeInformation stripe : reader.getStripes()) {
i += 1;
assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), stripe.getDataLength() < 5000);
}
// with HIVE-7832, the dictionaries will be disabled after writing the first
// stripe as there are too many distinct values. Hence only 3 stripes as
// compared to 25 stripes in version 0.11 (above test case)
assertEquals(3, i);
assertEquals(2500, reader.getNumberOfRows());
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method testMemoryManagementV11.
@Test
public void testMemoryManagementV11() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE).stripeSize(50000).bufferSize(100).rowIndexStride(0).memory(memory).batchSize(100).version(OrcFile.Version.V_0_11));
assertEquals(testFilePath, memory.path);
for (int i = 0; i < 2500; ++i) {
writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
}
writer.close();
assertEquals(null, memory.path);
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
int i = 0;
for (StripeInformation stripe : reader.getStripes()) {
i += 1;
assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), stripe.getDataLength() < 5000);
}
assertEquals(25, i);
assertEquals(2500, reader.getNumberOfRows());
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method emptyFile.
@Test
public void emptyFile() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(1000).compress(CompressionKind.NONE).bufferSize(100));
writer.close();
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
assertEquals(false, reader.rows().hasNext());
assertEquals(CompressionKind.NONE, reader.getCompression());
assertEquals(0, reader.getNumberOfRows());
assertEquals(0, reader.getCompressionSize());
assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
assertEquals(3, reader.getContentLength());
assertEquals(false, reader.getStripes().iterator().hasNext());
}
use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.
the class TestOrcFile method testStripeLevelStats.
@Test
public void testStripeLevelStats() throws Exception {
ObjectInspector inspector;
synchronized (TestOrcFile.class) {
inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
}
Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000).batchSize(1000));
for (int i = 0; i < 11000; i++) {
if (i >= 5000) {
if (i >= 10000) {
writer.addRow(new InnerStruct(3, "three"));
} else {
writer.addRow(new InnerStruct(2, "two"));
}
} else {
writer.addRow(new InnerStruct(1, "one"));
}
}
writer.close();
Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
TypeDescription schema = writer.getSchema();
assertEquals(2, schema.getMaximumId());
boolean[] expected = new boolean[] { false, true, false };
boolean[] included = OrcUtils.includeColumns("int1", schema);
assertEquals(true, Arrays.equals(expected, included));
List<StripeStatistics> stats = reader.getStripeStatistics();
int numStripes = stats.size();
assertEquals(3, numStripes);
StripeStatistics ss1 = stats.get(0);
StripeStatistics ss2 = stats.get(1);
StripeStatistics ss3 = stats.get(2);
assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues());
assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
assertEquals(1000, ss3.getColumnStatistics()[0].getNumberOfValues());
assertEquals(5000, (ss1.getColumnStatistics()[1]).getNumberOfValues());
assertEquals(5000, (ss2.getColumnStatistics()[1]).getNumberOfValues());
assertEquals(1000, (ss3.getColumnStatistics()[1]).getNumberOfValues());
assertEquals(1, ((IntegerColumnStatistics) ss1.getColumnStatistics()[1]).getMinimum());
assertEquals(2, ((IntegerColumnStatistics) ss2.getColumnStatistics()[1]).getMinimum());
assertEquals(3, ((IntegerColumnStatistics) ss3.getColumnStatistics()[1]).getMinimum());
assertEquals(1, ((IntegerColumnStatistics) ss1.getColumnStatistics()[1]).getMaximum());
assertEquals(2, ((IntegerColumnStatistics) ss2.getColumnStatistics()[1]).getMaximum());
assertEquals(3, ((IntegerColumnStatistics) ss3.getColumnStatistics()[1]).getMaximum());
assertEquals(5000, ((IntegerColumnStatistics) ss1.getColumnStatistics()[1]).getSum());
assertEquals(10000, ((IntegerColumnStatistics) ss2.getColumnStatistics()[1]).getSum());
assertEquals(3000, ((IntegerColumnStatistics) ss3.getColumnStatistics()[1]).getSum());
assertEquals(5000, (ss1.getColumnStatistics()[2]).getNumberOfValues());
assertEquals(5000, (ss2.getColumnStatistics()[2]).getNumberOfValues());
assertEquals(1000, (ss3.getColumnStatistics()[2]).getNumberOfValues());
assertEquals("one", ((StringColumnStatistics) ss1.getColumnStatistics()[2]).getMinimum());
assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMinimum());
assertEquals("three", ((StringColumnStatistics) ss3.getColumnStatistics()[2]).getMinimum());
assertEquals("one", ((StringColumnStatistics) ss1.getColumnStatistics()[2]).getMaximum());
assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMaximum());
assertEquals("three", ((StringColumnStatistics) ss3.getColumnStatistics()[2]).getMaximum());
assertEquals(15000, ((StringColumnStatistics) ss1.getColumnStatistics()[2]).getSum());
assertEquals(15000, ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getSum());
assertEquals(5000, ((StringColumnStatistics) ss3.getColumnStatistics()[2]).getSum());
RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows();
OrcProto.RowIndex[] index = recordReader.readRowIndex(0, null, null).getRowGroupIndex();
assertEquals(3, index.length);
List<OrcProto.RowIndexEntry> items = index[1].getEntryList();
assertEquals(1, items.size());
assertEquals(3, items.get(0).getPositionsCount());
assertEquals(0, items.get(0).getPositions(0));
assertEquals(0, items.get(0).getPositions(1));
assertEquals(0, items.get(0).getPositions(2));
assertEquals(1, items.get(0).getStatistics().getIntStatistics().getMinimum());
index = recordReader.readRowIndex(1, null, null).getRowGroupIndex();
assertEquals(3, index.length);
items = index[1].getEntryList();
assertEquals(2, items.get(0).getStatistics().getIntStatistics().getMaximum());
}
Aggregations