Search in sources :

Example 76 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testInOutFormat.

@Test
public void testInOutFormat() throws Exception {
    Properties properties = new Properties();
    properties.setProperty("columns", "x,y");
    properties.setProperty("columns.types", "int:int");
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    AbstractSerDe serde = new OrcSerde();
    HiveOutputFormat<?, ?> outFormat = new OrcOutputFormat();
    org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter writer = outFormat.getHiveRecordWriter(conf, testFilePath, MyRow.class, true, properties, Reporter.NULL);
    writer.write(serde.serialize(new MyRow(1, 2), inspector));
    writer.write(serde.serialize(new MyRow(2, 2), inspector));
    writer.write(serde.serialize(new MyRow(3, 2), inspector));
    writer.close(true);
    serde = new OrcSerde();
    SerDeUtils.initializeSerDe(serde, conf, properties, null);
    assertEquals(OrcSerde.OrcSerdeRow.class, serde.getSerializedClass());
    inspector = (StructObjectInspector) serde.getObjectInspector();
    assertEquals("struct<x:int,y:int>", inspector.getTypeName());
    InputFormat<?, ?> in = new OrcInputFormat();
    FileInputFormat.setInputPaths(conf, testFilePath.toString());
    InputSplit[] splits = in.getSplits(conf, 1);
    assertEquals(1, splits.length);
    // the the validate input method
    ArrayList<FileStatus> fileList = new ArrayList<FileStatus>();
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(testFilePath));
    assertEquals(true, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    fileList.add(fs.getFileStatus(workDir));
    assertEquals(false, ((InputFormatChecker) in).validateInput(fs, new HiveConf(), fileList));
    // read the whole file
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    org.apache.hadoop.mapred.RecordReader reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    Object key = reader.createKey();
    Writable value = (Writable) reader.createValue();
    int rowNum = 0;
    List<? extends StructField> fields = inspector.getAllStructFieldRefs();
    IntObjectInspector intInspector = (IntObjectInspector) fields.get(0).getFieldObjectInspector();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(0))));
        assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    assertEquals(1.0, reader.getProgress(), 0.00001);
    reader.close();
    // read just the first column
    ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(0));
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(null, inspector.getStructFieldData(value, fields.get(1)));
    }
    assertEquals(3, rowNum);
    reader.close();
    // test the mapping of empty string to all columns
    ColumnProjectionUtils.setReadAllColumns(conf);
    reader = in.getRecordReader(splits[0], conf, Reporter.NULL);
    key = reader.createKey();
    value = (Writable) reader.createValue();
    rowNum = 0;
    fields = inspector.getAllStructFieldRefs();
    while (reader.next(key, value)) {
        assertEquals(++rowNum, intInspector.get(inspector.getStructFieldData(value, fields.get(0))));
        assertEquals(2, intInspector.get(inspector.getStructFieldData(serde.deserialize(value), fields.get(1))));
    }
    assertEquals(3, rowNum);
    reader.close();
}
Also used : IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) ArrayList(java.util.ArrayList) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) IntWritable(org.apache.hadoop.io.IntWritable) Properties(java.util.Properties) AbstractSerDe(org.apache.hadoop.hive.serde2.AbstractSerDe) HiveConf(org.apache.hadoop.hive.conf.HiveConf) InputSplit(org.apache.hadoop.mapred.InputSplit) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 77 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestInputOutputFormat method testSplitGenReadOpsLocalCache.

@Test
public void testSplitGenReadOpsLocalCache() throws Exception {
    MockFileSystem fs = new MockFileSystem(conf);
    // creates the static cache
    MockPath mockPath = new MockPath(fs, "mock:///mocktbl");
    conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "0");
    conf.set("mapred.input.dir", mockPath.toString());
    conf.set("fs.defaultFS", "mock:///");
    conf.set("fs.mock.impl", MockFileSystem.class.getName());
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(new Path(mockPath + "/0_0"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    writer = OrcFile.createWriter(new Path(mockPath + "/0_1"), OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    int readOpsBefore = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    assertTrue("MockFS has stats. Read ops not expected to be -1", readOpsBefore != -1);
    OrcInputFormat orcInputFormat = new OrcInputFormat();
    InputSplit[] splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    int readOpsDelta = -1;
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    // call-2: check existence of side file for mock:/mocktbl/0_0
    // call-3: open - mock:/mocktbl/0_0
    // call-4: check existence of side file for  mock:/mocktbl/0_1
    // call-5: open - mock:/mocktbl/0_1
    assertEquals(5, readOpsDelta);
    // force BI to avoid reading footers
    conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "BI");
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    // call-2: check existence of side file for mock:/mocktbl/0_0
    // call-3: check existence of side file for  mock:/mocktbl/0_1
    assertEquals(3, readOpsDelta);
    // enable cache and use default strategy
    conf.set(ConfVars.HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE.varname, "10Mb");
    conf.set(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.varname, "HYBRID");
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    // call-2: check existence of side file for mock:/mocktbl/0_0
    // call-3: open - mock:/mocktbl/0_0
    // call-4: check existence of side file for mock:/mocktbl/0_1
    // call-5: open - mock:/mocktbl/0_1
    assertEquals(5, readOpsDelta);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsBefore = statistics.getReadOps();
        }
    }
    orcInputFormat = new OrcInputFormat();
    splits = orcInputFormat.getSplits(conf, 2);
    assertEquals(2, splits.length);
    for (FileSystem.Statistics statistics : FileSystem.getAllStatistics()) {
        if (statistics.getScheme().equalsIgnoreCase("mock")) {
            readOpsDelta = statistics.getReadOps() - readOpsBefore;
        }
    }
    // call-1: listLocatedStatus - mock:/mocktbl
    assertEquals(1, readOpsDelta);
    // revert back to local fs
    conf.set("fs.defaultFS", "file:///");
}
Also used : InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 78 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcFile method testWithoutIndex.

/**
 * Read and write a randomly generated snappy file.
 * @throws Exception
 */
@Test
public void testWithoutIndex() throws Exception {
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).inspector(inspector).stripeSize(5000).compress(CompressionKind.SNAPPY).bufferSize(1000).rowIndexStride(0));
    Random rand = new Random(24);
    for (int i = 0; i < 10000; ++i) {
        InnerStruct row = new InnerStruct(rand.nextInt(), Integer.toBinaryString(rand.nextInt()));
        for (int j = 0; j < 5; ++j) {
            writer.addRow(row);
        }
    }
    writer.close();
    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
    assertEquals(50000, reader.getNumberOfRows());
    assertEquals(0, reader.getRowIndexStride());
    StripeInformation stripe = reader.getStripes().iterator().next();
    assertEquals(true, stripe.getDataLength() != 0);
    assertEquals(0, stripe.getIndexLength());
    RecordReader rows = reader.rows();
    rand = new Random(24);
    OrcStruct row = null;
    for (int i = 0; i < 10000; ++i) {
        int intVal = rand.nextInt();
        String strVal = Integer.toBinaryString(rand.nextInt());
        for (int j = 0; j < 5; ++j) {
            assertEquals(true, rows.hasNext());
            row = (OrcStruct) rows.next(row);
            assertEquals(intVal, ((IntWritable) row.getFieldValue(0)).get());
            assertEquals(strVal, row.getFieldValue(1).toString());
        }
    }
    assertEquals(false, rows.hasNext());
    rows.close();
}
Also used : HiveDecimalObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector) BooleanObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector) ShortObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) MapObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) FloatObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector) StringObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector) ListObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector) IntObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector) LongObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector) BinaryObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector) ByteObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector) DoubleObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector) TimestampObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector) Random(java.util.Random) StripeInformation(org.apache.orc.StripeInformation) Test(org.junit.Test)

Example 79 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcRawRecordMerger method testNewBaseAndDelta.

private void testNewBaseAndDelta(boolean use130Format) throws Exception {
    final int BUCKET = 10;
    String[] values = new String[] { "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth" };
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testNewBaseAndDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write the base
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).inspector(inspector).bucket(BUCKET).finalDestination(root);
    final int BUCKET_PROPERTY = BucketCodec.V1.encode(options);
    if (!use130Format) {
        options.statementId(-1);
    }
    RecordUpdater ru = of.getRecordUpdater(root, options.writingBase(true).maximumWriteId(100));
    for (String v : values) {
        ru.insert(0, new MyRow(v));
    }
    ru.close(false);
    // write a delta
    ru = of.getRecordUpdater(root, options.writingBase(false).minimumWriteId(200).maximumWriteId(200).recordIdColumn(1));
    ru.update(200, new MyRow("update 1", 0, 0, BUCKET_PROPERTY));
    ru.update(200, new MyRow("update 2", 2, 0, BUCKET_PROPERTY));
    ru.update(200, new MyRow("update 3", 3, 0, BUCKET_PROPERTY));
    ru.delete(200, new MyRow("", 7, 0, BUCKET_PROPERTY));
    ru.delete(200, new MyRow("", 8, 0, BUCKET_PROPERTY));
    ru.close(false);
    ValidWriteIdList writeIdList = new ValidReaderWriteIdList("testNewBaseAndDelta:200:" + Long.MAX_VALUE);
    AcidUtils.Directory directory = AcidUtils.getAcidState(root, conf, writeIdList);
    assertEquals(new Path(root, "base_0000100"), directory.getBaseDirectory());
    assertEquals(new Path(root, use130Format ? AcidUtils.deleteDeltaSubdir(200, 200, 0) : AcidUtils.deleteDeltaSubdir(200, 200)), directory.getCurrentDirectories().get(0).getPath());
    assertEquals(new Path(root, use130Format ? AcidUtils.deltaSubdir(200, 200, 0) : AcidUtils.deltaSubdir(200, 200)), directory.getCurrentDirectories().get(1).getPath());
    Path basePath = AcidUtils.createBucketFile(directory.getBaseDirectory(), BUCKET);
    Path deltaPath = AcidUtils.createBucketFile(directory.getCurrentDirectories().get(1).getPath(), BUCKET);
    Path deleteDeltaDir = directory.getCurrentDirectories().get(0).getPath();
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    conf.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(conf, true, null);
    conf.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    // the first "split" is for base/
    Reader baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    OrcRawRecordMerger merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    RecordIdentifier id = merger.createKey();
    OrcStruct event = merger.createValue();
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
    assertEquals("second", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
    assertEquals("fifth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
    assertEquals("sixth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
    assertEquals("seventh", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
    assertEquals("tenth", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // second "split" is delta_200_200
    baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, true, baseReader, false, BUCKET, createMaximalTxnList(), new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // now run as if it's a minor Compaction so we don't collapse events
    // here there is only 1 "split" since we only have data for 1 bucket
    merger = new OrcRawRecordMerger(conf, false, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    // minor comp, so we ignore 'base_0000100' files so all Deletes end up first since
    // they all modify primordial rows
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    // data from delta_200_200
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // now run as if it's a major Compaction so we collapse events
    // here there is only 1 "split" since we only have data for 1 bucket
    baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, true, null, false, BUCKET, createMaximalTxnList(), new Reader.Options(), AcidUtils.getPaths(directory.getCurrentDirectories()), new OrcRawRecordMerger.Options().isCompacting(true).isMajorCompaction(true).baseDir(new Path(root, "base_0000100")));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 0, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 1, 0), id);
    assertEquals("second", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 2, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 3, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 4, 0), id);
    assertEquals("fifth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 5, 0), id);
    assertEquals("sixth", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 6, 0), id);
    assertEquals("seventh", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 7, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.DELETE_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 8, 200), id);
    assertNull(OrcRecordUpdater.getRow(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(0, BUCKET_PROPERTY, 9, 0), id);
    assertEquals("tenth", getValue(event));
    // data from delta_200_200
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 0, 200), id);
    assertEquals("update 1", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 1, 200), id);
    assertEquals("update 2", getValue(event));
    assertEquals(true, merger.next(id, event));
    assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
    assertEquals(new ReaderKey(200, BUCKET_PROPERTY, 2, 200), id);
    assertEquals("update 3", getValue(event));
    assertEquals(false, merger.next(id, event));
    merger.close();
    // try ignoring the 200 transaction and make sure it works still
    ValidWriteIdList writeIds = new ValidReaderWriteIdList("testNewBaseAndDelta:2000:200:200");
    // again 1st split is for base/
    baseReader = OrcFile.createReader(basePath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    for (int i = 0; i < values.length; ++i) {
        assertEquals(true, merger.next(id, event));
        LOG.info("id = " + id + "event = " + event);
        assertEquals(OrcRecordUpdater.INSERT_OPERATION, OrcRecordUpdater.getOperation(event));
        assertEquals(new ReaderKey(0, BUCKET_PROPERTY, i, 0), id);
        assertEquals(values[i], getValue(event));
    }
    assertEquals(false, merger.next(id, event));
    merger.close();
    // 2nd split is for delta_200_200 which is filtered out entirely by "txns"
    baseReader = OrcFile.createReader(deltaPath, OrcFile.readerOptions(conf));
    merger = new OrcRawRecordMerger(conf, false, baseReader, false, BUCKET, writeIds, new Reader.Options(), new Path[] { deleteDeltaDir }, new OrcRawRecordMerger.Options().isCompacting(false));
    assertEquals(null, merger.getMinKey());
    assertEquals(null, merger.getMaxKey());
    assertEquals(false, merger.next(id, event));
    merger.close();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) FileSystem(org.apache.hadoop.fs.FileSystem) ReaderKey(org.apache.hadoop.hive.ql.io.orc.OrcRawRecordMerger.ReaderKey) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) ValidWriteIdList(org.apache.hadoop.hive.common.ValidWriteIdList) ValidReaderWriteIdList(org.apache.hadoop.hive.common.ValidReaderWriteIdList) OrcAcidUtils(org.apache.orc.impl.OrcAcidUtils) AcidUtils(org.apache.hadoop.hive.ql.io.AcidUtils)

Example 80 with ObjectInspectorFactory.getReflectionObjectInspector

use of org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.getReflectionObjectInspector in project hive by apache.

the class TestOrcRawRecordMerger method testRecordReaderDelta.

/**
 * Test the RecordReader when there is a new base and a delta.
 * @throws Exception
 */
@Test
public void testRecordReaderDelta() throws Exception {
    final int BUCKET = 0;
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testRecordReaderDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write a delta
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).bucket(BUCKET).inspector(inspector).filesystem(fs).writingBase(false).minimumWriteId(1).maximumWriteId(1).finalDestination(root);
    RecordUpdater ru = of.getRecordUpdater(root, options);
    String[][] values = { new String[] { "a", "b", "c", "d", "e" }, new String[] { "f", "g", "h", "i", "j" } };
    for (int i = 0; i < values[0].length; ++i) {
        ru.insert(1, new MyRow(values[0][i]));
    }
    ru.close(false);
    // write a delta
    options.minimumWriteId(2).maximumWriteId(2);
    ru = of.getRecordUpdater(root, options);
    for (int i = 0; i < values[1].length; ++i) {
        ru.insert(2, new MyRow(values[1][i]));
    }
    ru.close(false);
    InputFormat inf = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.min.split.size", "1");
    job.set("mapred.max.split.size", "2");
    job.set("mapred.input.dir", root.toString());
    job.set("bucket_count", "1");
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(job, true, null);
    job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    InputSplit[] splits = inf.getSplits(job, 5);
    assertEquals(2, splits.length);
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    for (int j = 0; j < splits.length; j++) {
        InputSplit split = splits[j];
        rr = inf.getRecordReader(split, job, Reporter.NULL);
        OrcStruct row = rr.createValue();
        for (int i = 0; i < values[j].length; ++i) {
            System.out.println("Checking " + i);
            String msg = "split[" + j + "] at i=" + i;
            assertEquals(msg, true, rr.next(NullWritable.get(), row));
            assertEquals(msg, values[j][i], row.getFieldValue(0).toString());
        }
        assertEquals(false, rr.next(NullWritable.get(), row));
    }
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) NullWritable(org.apache.hadoop.io.NullWritable) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Test(org.junit.Test)

Aggregations

StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)73 Test (org.junit.Test)64 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)60 Configuration (org.apache.hadoop.conf.Configuration)25 StringObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector)25 InputSplit (org.apache.hadoop.mapred.InputSplit)25 BinaryObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector)24 MapObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector)23 Properties (java.util.Properties)20 IntObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector)20 ListObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector)18 BooleanObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector)18 ByteObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector)18 DoubleObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector)18 FloatObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector)18 HiveDecimalObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector)18 LongObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector)18 ShortObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector)18 TimestampObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector)18 RecordWriter (org.apache.hadoop.mapred.RecordWriter)18