Search in sources :

Example 6 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcFile method testListExpansion.

@Test
public void testListExpansion() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyList.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector));
    MyList row = new MyList();
    row.list.add(1);
    row.list.add(2);
    row.list.add(3);
    writer.addRow(row);
    row.list.clear();
    writer.addRow(row);
    row.list.add(11);
    row.list.add(12);
    writer.addRow(row);
    row.list = null;
    writer.addRow(row);
    row.list = new ArrayList<>();
    row.list.add(21);
    row.list.add(22);
    row.list.add(23);
    row.list.add(24);
    writer.addRow(row);
    writer.close();
    RecordReader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf)).rows();
    assertEquals(true, reader.hasNext());
    OrcStruct orcrow = (OrcStruct) reader.next(null);
    assertEquals(3, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
    orcrow = (OrcStruct) reader.next(row);
    assertEquals(0, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
    orcrow = (OrcStruct) reader.next(row);
    assertEquals(2, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
    assertEquals(null, ((OrcStruct) reader.next(row)).getFieldValue(0));
    orcrow = (OrcStruct) reader.next(row);
    assertEquals(4, ((List<IntWritable>) orcrow.getFieldValue(0)).size());
    assertEquals(false, reader.hasNext());
    reader.close();
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 7 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcFile method testMemoryManagementV12.

@Test
public void testMemoryManagementV12() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE).stripeSize(50000).bufferSize(100).rowIndexStride(0).memory(memory).batchSize(100).version(OrcFile.Version.V_0_12));
    assertEquals(testFilePath, memory.path);
    for (int i = 0; i < 2500; ++i) {
        writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
    }
    writer.close();
    assertEquals(null, memory.path);
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    int i = 0;
    for (StripeInformation stripe : reader.getStripes()) {
        i += 1;
        assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), stripe.getDataLength() < 5000);
    }
    // with HIVE-7832, the dictionaries will be disabled after writing the first
    // stripe as there are too many distinct values. Hence only 3 stripes as
    // compared to 25 stripes in version 0.11 (above test case)
    assertEquals(3, i);
    assertEquals(2500, reader.getNumberOfRows());
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Example 8 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcFile method testMemoryManagementV11.

@Test
public void testMemoryManagementV11() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE).stripeSize(50000).bufferSize(100).rowIndexStride(0).memory(memory).batchSize(100).version(OrcFile.Version.V_0_11));
    assertEquals(testFilePath, memory.path);
    for (int i = 0; i < 2500; ++i) {
        writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
    }
    writer.close();
    assertEquals(null, memory.path);
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    int i = 0;
    for (StripeInformation stripe : reader.getStripes()) {
        i += 1;
        assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), stripe.getDataLength() < 5000);
    }
    assertEquals(25, i);
    assertEquals(2500, reader.getNumberOfRows());
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Example 9 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcFile method emptyFile.

@Test
public void emptyFile() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(1000).compress(CompressionKind.NONE).bufferSize(100));
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    assertEquals(false, reader.rows().hasNext());
    assertEquals(CompressionKind.NONE, reader.getCompression());
    assertEquals(0, reader.getNumberOfRows());
    assertEquals(0, reader.getCompressionSize());
    assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
    assertEquals(3, reader.getContentLength());
    assertEquals(false, reader.getStripes().iterator().hasNext());
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) Test(org.junit.Test)

Example 10 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcFile method testStripeLevelStats.

@Test
public void testStripeLevelStats() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000).batchSize(1000));
    for (int i = 0; i < 11000; i++) {
        if (i >= 5000) {
            if (i >= 10000) {
                writer.addRow(new InnerStruct(3, "three"));
            } else {
                writer.addRow(new InnerStruct(2, "two"));
            }
        } else {
            writer.addRow(new InnerStruct(1, "one"));
        }
    }
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    TypeDescription schema = writer.getSchema();
    assertEquals(2, schema.getMaximumId());
    boolean[] expected = new boolean[] { false, true, false };
    boolean[] included = OrcUtils.includeColumns("int1", schema);
    assertEquals(true, Arrays.equals(expected, included));
    List<StripeStatistics> stats = reader.getStripeStatistics();
    int numStripes = stats.size();
    assertEquals(3, numStripes);
    StripeStatistics ss1 = stats.get(0);
    StripeStatistics ss2 = stats.get(1);
    StripeStatistics ss3 = stats.get(2);
    assertEquals(5000, ss1.getColumnStatistics()[0].getNumberOfValues());
    assertEquals(5000, ss2.getColumnStatistics()[0].getNumberOfValues());
    assertEquals(1000, ss3.getColumnStatistics()[0].getNumberOfValues());
    assertEquals(5000, (ss1.getColumnStatistics()[1]).getNumberOfValues());
    assertEquals(5000, (ss2.getColumnStatistics()[1]).getNumberOfValues());
    assertEquals(1000, (ss3.getColumnStatistics()[1]).getNumberOfValues());
    assertEquals(1, ((IntegerColumnStatistics) ss1.getColumnStatistics()[1]).getMinimum());
    assertEquals(2, ((IntegerColumnStatistics) ss2.getColumnStatistics()[1]).getMinimum());
    assertEquals(3, ((IntegerColumnStatistics) ss3.getColumnStatistics()[1]).getMinimum());
    assertEquals(1, ((IntegerColumnStatistics) ss1.getColumnStatistics()[1]).getMaximum());
    assertEquals(2, ((IntegerColumnStatistics) ss2.getColumnStatistics()[1]).getMaximum());
    assertEquals(3, ((IntegerColumnStatistics) ss3.getColumnStatistics()[1]).getMaximum());
    assertEquals(5000, ((IntegerColumnStatistics) ss1.getColumnStatistics()[1]).getSum());
    assertEquals(10000, ((IntegerColumnStatistics) ss2.getColumnStatistics()[1]).getSum());
    assertEquals(3000, ((IntegerColumnStatistics) ss3.getColumnStatistics()[1]).getSum());
    assertEquals(5000, (ss1.getColumnStatistics()[2]).getNumberOfValues());
    assertEquals(5000, (ss2.getColumnStatistics()[2]).getNumberOfValues());
    assertEquals(1000, (ss3.getColumnStatistics()[2]).getNumberOfValues());
    assertEquals("one", ((StringColumnStatistics) ss1.getColumnStatistics()[2]).getMinimum());
    assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMinimum());
    assertEquals("three", ((StringColumnStatistics) ss3.getColumnStatistics()[2]).getMinimum());
    assertEquals("one", ((StringColumnStatistics) ss1.getColumnStatistics()[2]).getMaximum());
    assertEquals("two", ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getMaximum());
    assertEquals("three", ((StringColumnStatistics) ss3.getColumnStatistics()[2]).getMaximum());
    assertEquals(15000, ((StringColumnStatistics) ss1.getColumnStatistics()[2]).getSum());
    assertEquals(15000, ((StringColumnStatistics) ss2.getColumnStatistics()[2]).getSum());
    assertEquals(5000, ((StringColumnStatistics) ss3.getColumnStatistics()[2]).getSum());
    RecordReaderImpl recordReader = (RecordReaderImpl) reader.rows();
    OrcProto.RowIndex[] index = recordReader.readRowIndex(0, null, null).getRowGroupIndex();
    assertEquals(3, index.length);
    List<OrcProto.RowIndexEntry> items = index[1].getEntryList();
    assertEquals(1, items.size());
    assertEquals(3, items.get(0).getPositionsCount());
    assertEquals(0, items.get(0).getPositions(0));
    assertEquals(0, items.get(0).getPositions(1));
    assertEquals(0, items.get(0).getPositions(2));
    assertEquals(1, items.get(0).getStatistics().getIntStatistics().getMinimum());
    index = recordReader.readRowIndex(1, null, null).getRowGroupIndex();
    assertEquals(3, index.length);
    items = index[1].getEntryList();
    assertEquals(2, items.get(0).getStatistics().getIntStatistics().getMaximum());
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) StripeStatistics(org.apache.orc.StripeStatistics) TypeDescription(org.apache.orc.TypeDescription) Test(org.junit.Test)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)73 Test (org.junit.Test)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)60 Configuration (org.apache.hadoop.conf.Configuration)25 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)25 InputSplit (org.apache.hadoop.mapred.InputSplit)25 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)24 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)23 Properties (java.util.Properties)20 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)20 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)18 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)18 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)18 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)18 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)18 RecordWriter (org.apache.hadoop.mapred.RecordWriter)18