Search in sources :

Example 81 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcRawRecordMerger method testEmpty.

@Test
public void testEmpty() throws Exception {
    final int BUCKET = 0;
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testEmpty").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write the empty base
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).writingBase(true).maximumTransactionId(100).finalDestination(root);
    of.getRecordUpdater(root, options).close(false);
    ValidTxnList txnList = new ValidReadTxnList("200:" + Long.MAX_VALUE);
    AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, txnList);
    Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
    Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN, true);
    OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()));
    RecordIdentifier key = merger.createKey();
    OrcStruct value = merger.createValue();
    assertEquals(false, merger.next(key, value));
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ValidReadTxnList(org.apache.hadoop.hive.common.ValidReadTxnList) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) ValidTxnList(org.apache.hadoop.hive.common.ValidTxnList) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils) Test(org.junit.Test)

Example 82 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcRecordUpdater method testUpdates.

@Test
public void testUpdates() throws Exception {
    Path root = new Path(workDir, "testUpdates");
    Configuration conf = new Configuration();
    FileSystem fs = root.getFileSystem(conf);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    int bucket = 20;
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).filesystem(fs).bucket(bucket).writingBase(false).minimumTransactionId(100).maximumTransactionId(100).inspector(inspector).reporter(Reporter.NULL).recordIdColumn(1).finalDestination(root);
    RecordUpdater updater = new OrcRecordUpdater(root, options);
    updater.update(100, new MyRow("update", 30, 10, bucket));
    updater.delete(100, new MyRow("", 60, 40, bucket));
    assertEquals(-1L, updater.getStats().getRowCount());
    updater.close(false);
    Path bucketPath = AcidUtils.createFilename(root, options);
    Reader reader = OrcFile.createReader(bucketPath, new OrcFile.ReaderOptions(conf).filesystem(fs));
    assertEquals(2, reader.getNumberOfRows());
    RecordReader rows = reader.rows();
    // check the contents of the file
    assertEquals(true, rows.hasNext());
    OrcStruct row = (OrcStruct) rows.next(null);
    assertEquals(OrcRecordUpdater.UPDATE_OPERATION, OrcRecordUpdater.getOperation(row));
    assertEquals(100, OrcRecordUpdater.getCurrentTransaction(row));
    assertEquals(10, OrcRecordUpdater.getOriginalTransaction(row));
    assertEquals(20, OrcRecordUpdater.getBucket(row));
    assertEquals(30, OrcRecordUpdater.getRowId(row));
    assertEquals("update", OrcRecordUpdater.getRow(row).getFieldValue(0).toString());
    assertEquals(true, rows.hasNext());
    row = (OrcStruct) rows.next(null);
    assertEquals(100, OrcRecordUpdater.getCurrentTransaction(row));
    assertEquals(40, OrcRecordUpdater.getOriginalTransaction(row));
    assertEquals(20, OrcRecordUpdater.getBucket(row));
    assertEquals(60, OrcRecordUpdater.getRowId(row));
    assertNull(OrcRecordUpdater.getRow(row));
    assertEquals(false, rows.hasNext());
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) Test(org.junit.Test)

Example 83 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcRecordUpdater method testWriterTblProperties.

@Test
public void testWriterTblProperties() throws Exception {
    Path root = new Path(workDir, "testWriterTblProperties");
    Configuration conf = new Configuration();
    // Must use raw local because the checksummer doesn't honor flushes.
    FileSystem fs = FileSystem.getLocal(conf).getRaw();
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Properties tblProps = new Properties();
    tblProps.setProperty("orc.compress", "SNAPPY");
    tblProps.setProperty("orc.compress.size", "8192");
    HiveConf.setIntVar(conf, HiveConf.ConfVars.HIVE_ORC_BASE_DELTA_RATIO, 4);
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).filesystem(fs).bucket(10).writingBase(false).minimumTransactionId(10).maximumTransactionId(19).inspector(inspector).reporter(Reporter.NULL).finalDestination(root).tableProperties(tblProps);
    RecordUpdater updater = new OrcRecordUpdater(root, options);
    updater.insert(11, new MyRow("first"));
    updater.insert(11, new MyRow("second"));
    updater.insert(11, new MyRow("third"));
    updater.flush();
    updater.insert(12, new MyRow("fourth"));
    updater.insert(12, new MyRow("fifth"));
    updater.flush();
    PrintStream origOut = System.out;
    ByteArrayOutputStream myOut = new ByteArrayOutputStream();
    System.setOut(new PrintStream(myOut));
    FileDump.main(new String[] { root.toUri().toString() });
    System.out.flush();
    String outDump = new String(myOut.toByteArray());
    assertEquals(true, outDump.contains("Compression: SNAPPY"));
    assertEquals(true, outDump.contains("Compression size: 2048"));
    System.setOut(origOut);
    updater.close(false);
}
Also used : Path(org.apache.hadoop.fs.Path) PrintStream(java.io.PrintStream) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) ByteArrayOutputStream(java.io.ByteArrayOutputStream) Properties(java.util.Properties) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) Test(org.junit.Test)

Example 84 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcSerDeStats method testStringAndBinaryStatistics.

@Test
public void testStringAndBinaryStatistics() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcSerDeStats.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(SimpleStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(100000).bufferSize(10000));
    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4), "foo"));
    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3), "bar"));
    writer.addRow(new SimpleStruct(bytes(0, 1, 2, 3, 4, 5), null));
    writer.addRow(new SimpleStruct(null, "hi"));
    writer.close();
    assertEquals(4, writer.getNumberOfRows());
    assertEquals(273, writer.getRawDataSize());
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    assertEquals(4, reader.getNumberOfRows());
    assertEquals(273, reader.getRawDataSize());
    assertEquals(15, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1")));
    assertEquals(258, reader.getRawDataSizeOfColumns(Lists.newArrayList("string1")));
    assertEquals(273, reader.getRawDataSizeOfColumns(Lists.newArrayList("bytes1", "string1")));
    // check the stats
    ColumnStatistics[] stats = reader.getStatistics();
    assertEquals(4, stats[0].getNumberOfValues());
    assertEquals("count: 4 hasNull: false", stats[0].toString());
    assertEquals(3, stats[1].getNumberOfValues());
    assertEquals(15, ((BinaryColumnStatistics) stats[1]).getSum());
    assertEquals("count: 3 hasNull: true sum: 15", stats[1].toString());
    assertEquals(3, stats[2].getNumberOfValues());
    assertEquals("bar", ((StringColumnStatistics) stats[2]).getMinimum());
    assertEquals("hi", ((StringColumnStatistics) stats[2]).getMaximum());
    assertEquals(8, ((StringColumnStatistics) stats[2]).getSum());
    assertEquals("count: 3 hasNull: true min: bar max: hi sum: 8", stats[2].toString());
    // check the inspectors
    StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
    assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
    assertEquals("struct<bytes1:binary,string1:string>", readerInspector.getTypeName());
    List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
    BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1").getFieldObjectInspector();
    StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1").getFieldObjectInspector();
    RecordReader rows = reader.rows();
    Object row = rows.next(null);
    assertNotNull(row);
    // check the contents of the first row
    assertEquals(bytes(0, 1, 2, 3, 4), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals("foo", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertEquals(bytes(0, 1, 2, 3), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals("bar", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertEquals(bytes(0, 1, 2, 3, 4, 5), bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertNull(st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // check the contents of second row
    assertEquals(true, rows.hasNext());
    row = rows.next(row);
    assertNull(bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(0))));
    assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(1))));
    // handle the close up
    assertEquals(false, rows.hasNext());
    rows.close();
}
Also used : DoubleColumnStatistics(org.apache.orc.DoubleColumnStatistics) IntegerColumnStatistics(org.apache.orc.IntegerColumnStatistics) BooleanColumnStatistics(org.apache.orc.BooleanColumnStatistics) StringColumnStatistics(org.apache.orc.StringColumnStatistics) ColumnStatistics(org.apache.orc.ColumnStatistics) BinaryColumnStatistics(org.apache.orc.BinaryColumnStatistics) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 85 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcSerDeStats method testOrcSerDeStatsMap.

@Test
public void testOrcSerDeStatsMap() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcSerDeStats.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MapStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(10000).bufferSize(10000));
    for (int row = 0; row < 1000; row++) {
        Map<String, Double> test = new HashMap<String, Double>();
        for (int i = 0; i < 10; i++) {
            test.put("hi" + i, 2.0);
        }
        writer.addRow(new MapStruct(test));
    }
    writer.close();
    // stats from writer
    assertEquals(1000, writer.getNumberOfRows());
    assertEquals(950000, writer.getRawDataSize());
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    // stats from reader
    assertEquals(1000, reader.getNumberOfRows());
    assertEquals(950000, reader.getRawDataSize());
    assertEquals(950000, reader.getRawDataSizeOfColumns(Lists.newArrayList("map1")));
}
Also used : ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) HashMap(java.util.HashMap) Test(org.junit.Test)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)72 Test (org.junit.Test)63 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)59 Path (org.apache.hadoop.fs.Path)26 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)25 InputSplit (org.apache.hadoop.mapred.InputSplit)25 Configuration (org.apache.hadoop.conf.Configuration)24 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)24 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)23 FileSystem (org.apache.hadoop.fs.FileSystem)21 Properties (java.util.Properties)20 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)20 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)18 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)18 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)18 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)18