Search in sources :

Example 1 with MemoryManager

use of org.apache.orc.MemoryManager in project hive by apache.

the class TestOrcFile method testMemoryManagementV12.

@Test
public void testMemoryManagementV12() throws Exception {
    OrcConf.ROWS_BETWEEN_CHECKS.setLong(conf, 100);
    final long poolSize = 50_000;
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManager memoryManager = new MemoryManagerImpl(poolSize);
    // set up 10 files that all request the full size.
    MemoryManager.Callback ignore = newScale -> false;
    for (int f = 0; f < 9; ++f) {
        memoryManager.addWriter(new Path("file-" + f), poolSize, ignore);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE).stripeSize(50000).bufferSize(100).rowIndexStride(0).memory(memoryManager).batchSize(100).version(OrcFile.Version.V_0_12));
    assertEquals(0.1, ((MemoryManagerImpl) memoryManager).getAllocationScale());
    for (int i = 0; i < 2500; ++i) {
        writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
    }
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    int i = 0;
    for (StripeInformation stripe : reader.getStripes()) {
        i += 1;
        assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), stripe.getDataLength() < 5000);
    }
    // with HIVE-7832, the dictionaries will be disabled after writing the first
    // stripe as there are too many distinct values. Hence only 3 stripes as
    // compared to 25 stripes in version 0.11 (above test case)
    assertEquals(3, i);
    assertEquals(2500, reader.getNumberOfRows());
    reader.close();
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) Text(org.apache.hadoop.io.Text) Random(java.util.Random) Date(org.apache.hadoop.hive.common.type.Date) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) LongWritable(org.apache.hadoop.io.LongWritable) ByteBuffer(java.nio.ByteBuffer) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) OrcConf(org.apache.orc.OrcConf) Assert.assertNotNull(junit.framework.Assert.assertNotNull) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) Path(org.apache.hadoop.fs.Path) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) Parameterized(org.junit.runners.Parameterized) LlapDaemonInfo(org.apache.hadoop.hive.llap.LlapDaemonInfo) StripeStatistics(org.apache.orc.StripeStatistics) Longs(com.google.common.primitives.Longs) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) Collection(java.util.Collection) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StringColumnStatistics(org.apache.orc.StringColumnStatistics) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) List(java.util.List) MemoryManagerImpl(org.apache.orc.impl.MemoryManagerImpl) BooleanWritable(org.apache.hadoop.io.BooleanWritable) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Assert.assertEquals(junit.framework.Assert.assertEquals) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) OrcProto(org.apache.orc.OrcProto) HashMap(java.util.HashMap) LlapProxy(org.apache.hadoop.hive.llap.io.api.LlapProxy) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) StripeInformation(org.apache.orc.StripeInformation) ArrayList(java.util.ArrayList) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) SearchArgumentFactory(org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory) Assert.assertTrue(junit.framework.Assert.assertTrue) Lists(com.google.common.collect.Lists) DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) TestName(org.junit.rules.TestName) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) OrcUtils(org.apache.orc.OrcUtils) BytesWritable(org.apache.hadoop.io.BytesWritable) ManagementFactory(java.lang.management.ManagementFactory) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Assert.assertNull(junit.framework.Assert.assertNull) Before(org.junit.Before) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ColumnStatistics(org.apache.orc.ColumnStatistics) TypeDescription(org.apache.orc.TypeDescription) IOException(java.io.IOException) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) Test(org.junit.Test) File(java.io.File) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) Rule(org.junit.Rule) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) HiveTestUtils(org.apache.hive.common.util.HiveTestUtils) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Assert(org.junit.Assert) FloatWritable(org.apache.hadoop.io.FloatWritable) Assert.assertFalse(junit.framework.Assert.assertFalse) MemoryManager(org.apache.orc.MemoryManager) Path(org.apache.hadoop.fs.Path) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) MemoryManagerImpl(org.apache.orc.impl.MemoryManagerImpl) MemoryManager(org.apache.orc.MemoryManager) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Example 2 with MemoryManager

use of org.apache.orc.MemoryManager in project hive by apache.

the class TestOrcFile method testMemoryManagementV11.

@Test
public void testMemoryManagementV11() throws Exception {
    OrcConf.ROWS_BETWEEN_CHECKS.setLong(conf, 100);
    final long poolSize = 50_000;
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    MemoryManager memoryManager = new MemoryManagerImpl(poolSize);
    // set up 10 files that all request the full size.
    MemoryManager.Callback ignore = newScale -> false;
    for (int f = 0; f < 9; ++f) {
        memoryManager.addWriter(new Path("file-" + f), poolSize, ignore);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).compress(CompressionKind.NONE).stripeSize(50000).bufferSize(100).rowIndexStride(0).memory(memoryManager).batchSize(100).version(OrcFile.Version.V_0_11));
    assertEquals(0.1, ((MemoryManagerImpl) memoryManager).getAllocationScale());
    for (int i = 0; i < 2500; ++i) {
        writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
    }
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    int i = 0;
    for (StripeInformation stripe : reader.getStripes()) {
        i += 1;
        assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), stripe.getDataLength() < 5000);
    }
    assertEquals(25, i);
    assertEquals(2500, reader.getNumberOfRows());
    reader.close();
}
Also used : Arrays(java.util.Arrays) FileSystem(org.apache.hadoop.fs.FileSystem) Text(org.apache.hadoop.io.Text) Random(java.util.Random) Date(org.apache.hadoop.hive.common.type.Date) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) LongWritable(org.apache.hadoop.io.LongWritable) ByteBuffer(java.nio.ByteBuffer) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) TimestampWritableV2(org.apache.hadoop.hive.serde2.io.TimestampWritableV2) DecimalColumnStatistics(org.apache.orc.DecimalColumnStatistics) OrcConf(org.apache.orc.OrcConf) Assert.assertNotNull(junit.framework.Assert.assertNotNull) Map(java.util.Map) Configuration(org.apache.hadoop.conf.Configuration) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) Path(org.apache.hadoop.fs.Path) BigInteger(java.math.BigInteger) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) ShortWritable(org.apache.hadoop.hive.serde2.io.ShortWritable) IntWritable(org.apache.hadoop.io.IntWritable) Parameterized(org.junit.runners.Parameterized) LlapDaemonInfo(org.apache.hadoop.hive.llap.LlapDaemonInfo) StripeStatistics(org.apache.orc.StripeStatistics) Longs(com.google.common.primitives.Longs) DateWritableV2(org.apache.hadoop.hive.serde2.io.DateWritableV2) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) Collection(java.util.Collection) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StringColumnStatistics(org.apache.orc.StringColumnStatistics) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) List(java.util.List) MemoryManagerImpl(org.apache.orc.impl.MemoryManagerImpl) BooleanWritable(org.apache.hadoop.io.BooleanWritable) StructField(org.apache.hadoop.hive.serde2.objectinspector.StructField) Assert.assertEquals(junit.framework.Assert.assertEquals) ByteWritable(org.apache.hadoop.hive.serde2.io.ByteWritable) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) RunWith(org.junit.runner.RunWith) Parameters(org.junit.runners.Parameterized.Parameters) OrcProto(org.apache.orc.OrcProto) HashMap(java.util.HashMap) LlapProxy(org.apache.hadoop.hive.llap.io.api.LlapProxy) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) StripeInformation(org.apache.orc.StripeInformation) ArrayList(java.util.ArrayList) SearchArgument(org.apache.hadoop.hive.ql.io.sarg.SearchArgument) SearchArgumentFactory(org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory) Assert.assertTrue(junit.framework.Assert.assertTrue) Lists(com.google.common.collect.Lists) DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) TestName(org.junit.rules.TestName) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) OrcUtils(org.apache.orc.OrcUtils) BytesWritable(org.apache.hadoop.io.BytesWritable) ManagementFactory(java.lang.management.ManagementFactory) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) DoubleWritable(org.apache.hadoop.hive.serde2.io.DoubleWritable) Assert.assertNull(junit.framework.Assert.assertNull) Before(org.junit.Before) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) Iterator(java.util.Iterator) Timestamp(org.apache.hadoop.hive.common.type.Timestamp) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ColumnStatistics(org.apache.orc.ColumnStatistics) TypeDescription(org.apache.orc.TypeDescription) IOException(java.io.IOException) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) Test(org.junit.Test) File(java.io.File) PredicateLeaf(org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf) Rule(org.junit.Rule) ObjectInspectorFactory(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory) HiveTestUtils(org.apache.hive.common.util.HiveTestUtils) HiveDecimal(org.apache.hadoop.hive.common.type.HiveDecimal) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) HiveDecimalWritable(org.apache.hadoop.hive.serde2.io.HiveDecimalWritable) Assert(org.junit.Assert) FloatWritable(org.apache.hadoop.io.FloatWritable) Assert.assertFalse(junit.framework.Assert.assertFalse) MemoryManager(org.apache.orc.MemoryManager) Path(org.apache.hadoop.fs.Path) HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) MemoryManagerImpl(org.apache.orc.impl.MemoryManagerImpl) MemoryManager(org.apache.orc.MemoryManager) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Aggregations

Lists (com.google.common.collect.Lists)2 Longs (com.google.common.primitives.Longs)2 File (java.io.File)2 IOException (java.io.IOException)2 ManagementFactory (java.lang.management.ManagementFactory)2 BigInteger (java.math.BigInteger)2 ByteBuffer (java.nio.ByteBuffer)2 ArrayList (java.util.ArrayList)2 Arrays (java.util.Arrays)2 Collection (java.util.Collection)2 HashMap (java.util.HashMap)2 Iterator (java.util.Iterator)2 List (java.util.List)2 Map (java.util.Map)2 Random (java.util.Random)2 Assert.assertEquals (junit.framework.Assert.assertEquals)2 Assert.assertFalse (junit.framework.Assert.assertFalse)2 Assert.assertNotNull (junit.framework.Assert.assertNotNull)2 Assert.assertNull (junit.framework.Assert.assertNull)2 Assert.assertTrue (junit.framework.Assert.assertTrue)2