Search in sources :

Example 36 with NullWritable

use of org.apache.hadoop.io.NullWritable in project camel by apache.

the class HdfsConsumerTest method testReadBoolean.

@Test
public void testReadBoolean() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-boolean").getAbsolutePath());
    Configuration conf = new Configuration();
    SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, BooleanWritable.class);
    NullWritable keyWritable = NullWritable.get();
    BooleanWritable valueWritable = new BooleanWritable();
    valueWritable.set(true);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) BooleanWritable(org.apache.hadoop.io.BooleanWritable) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Writer(org.apache.hadoop.io.SequenceFile.Writer) Test(org.junit.Test)

Example 37 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class TestInputOutputFormat method testVectorizationWithBuckets.

/**
 * Test vectorization, non-acid, non-combine.
 * @throws Exception
 */
@Test
public void testVectorizationWithBuckets() throws Exception {
    // get the object inspector for MyRow
    StructObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = (StructObjectInspector) ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"), "vectorBuckets", inspector, true, 1);
    // write the orc file to the mock file system
    Path path = new Path(conf.get("mapred.input.dir") + "/0_0");
    Writer writer = OrcFile.createWriter(path, OrcFile.writerOptions(conf).blockPadding(false).bufferSize(1024).inspector(inspector));
    for (int i = 0; i < 10; ++i) {
        writer.addRow(new MyRow(i, 2 * i));
    }
    writer.close();
    setBlocks(path, conf, new MockBlock("host0", "host1"));
    // call getsplits
    conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3);
    HiveInputFormat<?, ?> inputFormat = new HiveInputFormat<WritableComparable, Writable>();
    InputSplit[] splits = inputFormat.getSplits(conf, 10);
    assertEquals(1, splits.length);
    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch> reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
    NullWritable key = reader.createKey();
    VectorizedRowBatch value = reader.createValue();
    assertEquals(true, reader.next(key, value));
    assertEquals(10, value.count());
    LongColumnVector col0 = (LongColumnVector) value.cols[0];
    for (int i = 0; i < 10; i++) {
        assertEquals("checking " + i, i, col0.vector[i]);
    }
    assertEquals(false, reader.next(key, value));
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) CombineHiveInputFormat(org.apache.hadoop.hive.ql.io.CombineHiveInputFormat) HiveInputFormat(org.apache.hadoop.hive.ql.io.HiveInputFormat) VectorizedRowBatch(org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) RecordWriter(org.apache.hadoop.mapred.RecordWriter) LongColumnVector(org.apache.hadoop.hive.ql.exec.vector.LongColumnVector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Test(org.junit.Test)

Example 38 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class TestOrcRawRecordMerger method testRecordReaderNewBaseAndDelta.

/**
 * Test the RecordReader when there is a new base and a delta.
 * This test creates multiple stripes in both base and delta files which affects how many splits
 * are created on read.  With ORC-228 this could be done in E2E fashion with a query or
 * streaming ingest writing data.
 * @see #testRecordReaderOldBaseAndDelta()
 * @throws Exception
 */
@Test
public void testRecordReaderNewBaseAndDelta() throws Exception {
    final int BUCKET = 11;
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf);
    Path root = new Path(tmpDir, "testRecordReaderNewBaseAndDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write the base
    MemoryManager mgr = new MemoryManagerImpl(conf) {

        int rowsAddedSinceCheck = 0;

        @Override
        public synchronized void addedRow(int rows) throws IOException {
            rowsAddedSinceCheck += rows;
            if (rowsAddedSinceCheck >= 2) {
                notifyWriters();
                rowsAddedSinceCheck = 0;
            }
        }
    };
    // make 5 stripes with 2 rows each
    OrcRecordUpdater.OrcOptions options = (OrcRecordUpdater.OrcOptions) new OrcRecordUpdater.OrcOptions(conf).writingBase(true).minimumWriteId(0).maximumWriteId(0).bucket(BUCKET).inspector(inspector).filesystem(fs);
    final int BUCKET_PROPERTY = BucketCodec.V1.encode(options);
    options.orcOptions(OrcFile.writerOptions(conf).stripeSize(1).blockPadding(false).compress(CompressionKind.NONE).memory(mgr).batchSize(2));
    options.finalDestination(root);
    RecordUpdater ru = of.getRecordUpdater(root, options);
    String[] values = new String[] { "ignore.1", "0.1", "ignore.2", "ignore.3", "2.0", "2.1", "3.0", "ignore.4", "ignore.5", "ignore.6" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(0, new BigRow(i, i, values[i], i, i));
    }
    ru.close(false);
    // write a delta
    options.writingBase(false).minimumWriteId(1).maximumWriteId(1).recordIdColumn(5);
    ru = of.getRecordUpdater(root, options);
    values = new String[] { "0.0", null, null, "1.1", null, null, null, "ignore.7" };
    for (int i = 0; i < values.length; ++i) {
        if (values[i] != null) {
            ru.update(1, new BigRow(i, i, values[i], i, i, i, 0, BUCKET_PROPERTY));
        }
    }
    ru.delete(1, new BigRow(9, 0, BUCKET_PROPERTY));
    ru.close(false);
    // write a delta
    options.minimumWriteId(100).maximumWriteId(100);
    ru = of.getRecordUpdater(root, options);
    values = new String[] { null, null, "1.0", null, null, null, null, "3.1" };
    for (int i = 0; i < values.length - 1; ++i) {
        if (values[i] != null) {
            ru.update(100, new BigRow(i, i, values[i], i, i, i, 0, BUCKET_PROPERTY));
        }
    }
    // do this before next update so that delte_delta is properly sorted
    ru.delete(100, new BigRow(8, 0, BUCKET_PROPERTY));
    // because row 8 was updated and thus has a different RecordIdentifier now
    ru.update(100, new BigRow(7, 7, values[values.length - 1], 7, 7, 2, 1, BUCKET_PROPERTY));
    ru.close(false);
    MyResult[] expected = new MyResult[10];
    int k = 0;
    expected[k++] = new MyResult(0, "0.0");
    expected[k++] = new MyResult(1, "0.1");
    expected[k++] = new MyResult(2, "1.0");
    expected[k++] = new MyResult(3, "1.1");
    expected[k++] = new MyResult(4, "2.0");
    expected[k++] = new MyResult(5, "2.1");
    expected[k++] = new MyResult(6, "3.0");
    expected[k] = new MyResult(7, "3.1");
    InputFormat inf = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.min.split.size", "1");
    job.set("mapred.max.split.size", "2");
    job.set("mapred.input.dir", root.toString());
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, BigRow.getColumnNamesProperty());
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, BigRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(job, true, null);
    job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    InputSplit[] splits = inf.getSplits(job, 5);
    // base has 10 rows, so 5 splits, 1 delta has 2 rows so 1 split, and 1 delta has 3 so 2 splits
    assertEquals(8, splits.length);
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr;
    for (InputSplit split : splits) {
        rr = inf.getRecordReader(split, job, Reporter.NULL);
        NullWritable key = rr.createKey();
        OrcStruct value = rr.createValue();
        while (rr.next(key, value)) {
            MyResult mr = new MyResult(Integer.parseInt(value.getFieldValue(0).toString()), value.getFieldValue(2).toString());
            int i = 0;
            for (; i < expected.length; i++) {
                if (mr.equals(expected[i])) {
                    expected[i] = null;
                    break;
                }
            }
            if (i >= expected.length) {
                // not found
                assertTrue("Found unexpected row: " + mr, false);
            }
        }
    }
    for (MyResult mr : expected) {
        assertTrue("Expected " + mr + " not found in any InputSplit", mr == null);
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) MemoryManagerImpl(org.apache.orc.impl.MemoryManagerImpl) FileSystem(org.apache.hadoop.fs.FileSystem) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) MemoryManager(org.apache.orc.MemoryManager) NullWritable(org.apache.hadoop.io.NullWritable) InputFormat(org.apache.hadoop.mapred.InputFormat) Test(org.junit.Test)

Example 39 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class TestOrcRawRecordMerger method testRecordReaderIncompleteDelta.

/**
 * @param use130Format true means use delta_0001_0001_0000 format, else delta_0001_00001
 */
private void testRecordReaderIncompleteDelta(boolean use130Format) throws Exception {
    final int BUCKET = 1;
    Configuration conf = new Configuration();
    OrcOutputFormat of = new OrcOutputFormat();
    FileSystem fs = FileSystem.getLocal(conf).getRaw();
    Path root = new Path(tmpDir, "testRecordReaderIncompleteDelta").makeQualified(fs);
    fs.delete(root, true);
    ObjectInspector inspector;
    synchronized (TestOrcFile.class) {
        inspector = ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class, ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
    }
    // write a base
    AcidOutputFormat.Options options = new AcidOutputFormat.Options(conf).writingBase(true).minimumWriteId(0).maximumWriteId(0).bucket(BUCKET).inspector(inspector).filesystem(fs).finalDestination(root);
    if (!use130Format) {
        options.statementId(-1);
    }
    RecordUpdater ru = of.getRecordUpdater(root, options);
    String[] values = new String[] { "1", "2", "3", "4", "5" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(0, new MyRow(values[i]));
    }
    ru.close(false);
    // write a delta
    options.writingBase(false).minimumWriteId(10).maximumWriteId(19);
    ru = of.getRecordUpdater(root, options);
    values = new String[] { "6", "7", "8" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(1, new MyRow(values[i]));
    }
    InputFormat inf = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.input.dir", root.toString());
    job.set("bucket_count", "2");
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, MyRow.getColumnNamesProperty());
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, MyRow.getColumnTypesProperty());
    AcidUtils.setAcidOperationalProperties(job, true, null);
    job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    // read the keys before the delta is flushed
    InputSplit[] splits = inf.getSplits(job, 1);
    // 1 split since we only have 1 bucket file in base/.  delta is not flushed (committed) yet, i.e. empty
    assertEquals(1, splits.length);
    org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    NullWritable key = rr.createKey();
    OrcStruct value = rr.createValue();
    System.out.println("Looking at split " + splits[0]);
    for (int i = 1; i < 6; ++i) {
        System.out.println("Checking row " + i);
        assertEquals(true, rr.next(key, value));
        assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    }
    assertEquals(false, rr.next(key, value));
    ru.flush();
    ru.flush();
    values = new String[] { "9", "10" };
    for (int i = 0; i < values.length; ++i) {
        ru.insert(3, new MyRow(values[i]));
    }
    ru.flush();
    splits = inf.getSplits(job, 1);
    assertEquals(2, splits.length);
    Path sideFile = new Path(root + "/" + (use130Format ? AcidUtils.deltaSubdir(10, 19, 0) : AcidUtils.deltaSubdir(10, 19)) + "/bucket_00001_flush_length");
    assertEquals(true, fs.exists(sideFile));
    assertEquals(32, fs.getFileStatus(sideFile).getLen());
    rr = inf.getRecordReader(splits[0], job, Reporter.NULL);
    for (int i = 1; i <= 5; ++i) {
        assertEquals(true, rr.next(key, value));
        assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    }
    assertEquals(false, rr.next(key, value));
    rr = inf.getRecordReader(splits[1], job, Reporter.NULL);
    for (int i = 6; i < 11; ++i) {
        assertEquals("i=" + i, true, rr.next(key, value));
        assertEquals(Integer.toString(i), value.getFieldValue(0).toString());
    }
    assertEquals(false, rr.next(key, value));
}
Also used : Path(org.apache.hadoop.fs.Path) ObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector) StructObjectInspector(org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector) Configuration(org.apache.hadoop.conf.Configuration) NullWritable(org.apache.hadoop.io.NullWritable) AcidOutputFormat(org.apache.hadoop.hive.ql.io.AcidOutputFormat) InputFormat(org.apache.hadoop.mapred.InputFormat) FileSystem(org.apache.hadoop.fs.FileSystem) RecordUpdater(org.apache.hadoop.hive.ql.io.RecordUpdater) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 40 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class AbstractTestParquetDirect method read.

public static List<ArrayWritable> read(Path parquetFile) throws IOException {
    List<ArrayWritable> records = new ArrayList<ArrayWritable>();
    RecordReader<NullWritable, ArrayWritable> reader = new MapredParquetInputFormat().getRecordReader(new FileSplit(parquetFile, 0, fileLength(parquetFile), (String[]) null), new JobConf(), null);
    NullWritable alwaysNull = reader.createKey();
    ArrayWritable record = reader.createValue();
    while (reader.next(alwaysNull, record)) {
        records.add(record);
        // a new value so the last isn't clobbered
        record = reader.createValue();
    }
    return records;
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) ArrayList(java.util.ArrayList) FileSplit(org.apache.hadoop.mapred.FileSplit) NullWritable(org.apache.hadoop.io.NullWritable) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)113 Test (org.junit.Test)68 Path (org.apache.hadoop.fs.Path)47 Configuration (org.apache.hadoop.conf.Configuration)44 File (java.io.File)33 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)24 JobConf (org.apache.hadoop.mapred.JobConf)24 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)17 InputSplit (org.apache.hadoop.mapred.InputSplit)17 LongWritable (org.apache.hadoop.io.LongWritable)16 IntWritable (org.apache.hadoop.io.IntWritable)11 IOException (java.io.IOException)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)8 Pair (org.apache.hadoop.mrunit.types.Pair)8 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8