Search in sources :

Example 41 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class StreamingAssert method readRecords.

/**
 * TODO: this would be more flexible doing a SQL select statement rather than using InputFormat directly
 * see {@link org.apache.hive.hcatalog.streaming.TestStreaming#checkDataWritten2(Path, long, long, int, String, String...)}
 * @param numSplitsExpected
 * @return
 * @throws Exception
 */
List<Record> readRecords(int numSplitsExpected) throws Exception {
    if (currentDeltas.isEmpty()) {
        throw new AssertionError("No data");
    }
    InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
    JobConf job = new JobConf();
    job.set("mapred.input.dir", partitionLocation.toString());
    job.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(table.getSd().getNumBuckets()));
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
    job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
    AcidUtils.setAcidOperationalProperties(job, true, null);
    job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
    job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
    InputSplit[] splits = inputFormat.getSplits(job, 1);
    assertEquals(numSplitsExpected, splits.length);
    List<Record> records = new ArrayList<>();
    for (InputSplit is : splits) {
        final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(is, job, Reporter.NULL);
        NullWritable key = recordReader.createKey();
        OrcStruct value = recordReader.createValue();
        while (recordReader.next(key, value)) {
            RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
            Record record = new Record(new RecordIdentifier(recordIdentifier.getWriteId(), recordIdentifier.getBucketProperty(), recordIdentifier.getRowId()), value.toString());
            System.out.println(record);
            records.add(record);
        }
        recordReader.close();
    }
    return records;
}
Also used : ArrayList(java.util.ArrayList) AcidRecordReader(org.apache.hadoop.hive.ql.io.AcidInputFormat.AcidRecordReader) NullWritable(org.apache.hadoop.io.NullWritable) RecordIdentifier(org.apache.hadoop.hive.ql.io.RecordIdentifier) OrcStruct(org.apache.hadoop.hive.ql.io.orc.OrcStruct) OrcInputFormat(org.apache.hadoop.hive.ql.io.orc.OrcInputFormat) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 42 with NullWritable

use of org.apache.hadoop.io.NullWritable in project incubator-rya by apache.

the class ForwardChainTest method testTransitiveChain.

/**
 * MultipleOutputs support is minimal, so we have to check each map/reduce
 * step explicitly
 */
@Test
public void testTransitiveChain() throws Exception {
    int max = 8;
    int n = 4;
    URI prop = TestUtils.uri("subOrganizationOf");
    Map<Integer, Map<Integer, Pair<Fact, NullWritable>>> connections = new HashMap<>();
    for (int i = 0; i <= max; i++) {
        connections.put(i, new HashMap<Integer, Pair<Fact, NullWritable>>());
    }
    // Initial input: make a chain from org0 to org8
    for (int i = 0; i < max; i++) {
        URI orgI = TestUtils.uri("org" + i);
        URI orgJ = TestUtils.uri("org" + (i + 1));
        Fact triple = new Fact(orgI, prop, orgJ);
        connections.get(i).put(i + 1, new Pair<>(triple, NullWritable.get()));
    }
    for (int i = 1; i <= n; i++) {
        // Map:
        MapDriver<Fact, NullWritable, ResourceWritable, Fact> mDriver = new MapDriver<>();
        mDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
        mDriver.setMapper(new ForwardChain.FileMapper(schema));
        for (int j : connections.keySet()) {
            for (int k : connections.get(j).keySet()) {
                mDriver.addInput(connections.get(j).get(k));
            }
        }
        List<Pair<ResourceWritable, Fact>> mapped = mDriver.run();
        // Convert data for reduce phase:
        ReduceFeeder<ResourceWritable, Fact> feeder = new ReduceFeeder<>(mDriver.getConfiguration());
        List<KeyValueReuseList<ResourceWritable, Fact>> intermediate = feeder.sortAndGroup(mapped, new ResourceWritable.SecondaryComparator(), new ResourceWritable.PrimaryComparator());
        // Reduce, and compare to expected output:
        ReduceDriver<ResourceWritable, Fact, Fact, NullWritable> rDriver = new ReduceDriver<>();
        rDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
        rDriver.setReducer(new ForwardChain.ReasoningReducer(schema));
        rDriver.addAllElements(intermediate);
        int maxSpan = (int) Math.pow(2, i);
        int minSpan = (maxSpan / 2) + 1;
        // For each j, build all paths starting with j:
        for (int j = 0; j < max; j++) {
            // This includes any path of length k for appropriate k:
            for (int k = minSpan; k <= maxSpan && j + k <= max; k++) {
                int middle = j + minSpan - 1;
                URI left = TestUtils.uri("org" + j);
                URI right = TestUtils.uri("org" + (j + k));
                Fact triple = new Fact(left, prop, right, i, OwlRule.PRP_TRP, TestUtils.uri("org" + middle));
                triple.addSource(connections.get(j).get(middle).getFirst());
                triple.addSource(connections.get(middle).get(j + k).getFirst());
                Pair<Fact, NullWritable> expected = new Pair<>(triple, NullWritable.get());
                connections.get(j).put(j + k, expected);
                rDriver.addMultiOutput("intermediate", expected);
            }
        }
        rDriver.runTest();
    }
}
Also used : HashMap(java.util.HashMap) ReduceFeeder(org.apache.hadoop.mrunit.mapreduce.ReduceFeeder) URI(org.openrdf.model.URI) ReduceDriver(org.apache.hadoop.mrunit.mapreduce.ReduceDriver) Pair(org.apache.hadoop.mrunit.types.Pair) MapDriver(org.apache.hadoop.mrunit.mapreduce.MapDriver) KeyValueReuseList(org.apache.hadoop.mrunit.types.KeyValueReuseList) Fact(org.apache.rya.reasoning.Fact) NullWritable(org.apache.hadoop.io.NullWritable) HashMap(java.util.HashMap) Map(java.util.Map) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 43 with NullWritable

use of org.apache.hadoop.io.NullWritable in project druid by druid-io.

the class OrcHadoopInputRowParserTest method getAllRows.

private static List<InputRow> getAllRows(HadoopDruidIndexerConfig config) throws IOException {
    Job job = Job.getInstance(new Configuration());
    config.intoConfiguration(job);
    File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), new String[] { "host" });
    InputFormat<NullWritable, OrcStruct> inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration());
    RecordReader<NullWritable, OrcStruct> reader = inputFormat.getRecordReader(split, new JobConf(job.getConfiguration()), null);
    try {
        List<InputRow> records = new ArrayList<>();
        InputRowParser parser = config.getParser();
        final NullWritable key = reader.createKey();
        OrcStruct value = reader.createValue();
        while (reader.next(key, value)) {
            records.add(((List<InputRow>) parser.parseBatch(value)).get(0));
            value = reader.createValue();
        }
        return records;
    } finally {
        reader.close();
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) FileSplit(org.apache.hadoop.mapred.FileSplit) NullWritable(org.apache.hadoop.io.NullWritable) OrcStruct(org.apache.orc.mapred.OrcStruct) InputRow(org.apache.druid.data.input.InputRow) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) Job(org.apache.hadoop.mapreduce.Job) File(java.io.File) JobConf(org.apache.hadoop.mapred.JobConf)

Example 44 with NullWritable

use of org.apache.hadoop.io.NullWritable in project beam by apache.

the class WritableCoderTest method testNullWritableEncoding.

@Test
public void testNullWritableEncoding() throws Exception {
    NullWritable value = NullWritable.get();
    WritableCoder<NullWritable> coder = WritableCoder.of(NullWritable.class);
    CoderProperties.coderDecodeEncodeEqual(coder, value);
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 45 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class ParquetRecordReaderWrapper method next.

@Override
public boolean next(final NullWritable key, final ArrayWritable value) throws IOException {
    if (eof) {
        return false;
    }
    try {
        if (firstRecord) {
            // key & value are already read.
            firstRecord = false;
        } else if (!realReader.nextKeyValue()) {
            // strictly not required, just for consistency
            eof = true;
            return false;
        }
        final ArrayWritable tmpCurValue = realReader.getCurrentValue();
        if (value != tmpCurValue) {
            final Writable[] arrValue = value.get();
            final Writable[] arrCurrent = tmpCurValue.get();
            if (value != null && arrValue.length == arrCurrent.length) {
                System.arraycopy(arrCurrent, 0, arrValue, 0, arrCurrent.length);
            } else {
                if (arrValue.length != arrCurrent.length) {
                    throw new IOException("DeprecatedParquetHiveInput : size of object differs. Value" + " size :  " + arrValue.length + ", Current Object size : " + arrCurrent.length);
                } else {
                    throw new IOException("DeprecatedParquetHiveInput can not support RecordReaders that" + " don't return same key & value & value is null");
                }
            }
        }
        return true;
    } catch (final InterruptedException e) {
        throw new IOException(e);
    }
}
Also used : ArrayWritable(org.apache.hadoop.io.ArrayWritable) NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) ArrayWritable(org.apache.hadoop.io.ArrayWritable) IOException(java.io.IOException)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)113 Test (org.junit.Test)68 Path (org.apache.hadoop.fs.Path)47 Configuration (org.apache.hadoop.conf.Configuration)44 File (java.io.File)33 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)24 JobConf (org.apache.hadoop.mapred.JobConf)24 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)17 InputSplit (org.apache.hadoop.mapred.InputSplit)17 LongWritable (org.apache.hadoop.io.LongWritable)16 IntWritable (org.apache.hadoop.io.IntWritable)11 IOException (java.io.IOException)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)8 Pair (org.apache.hadoop.mrunit.types.Pair)8 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8