use of org.apache.hadoop.io.NullWritable in project hive by apache.
the class StreamingAssert method readRecords.
/**
* TODO: this would be more flexible doing a SQL select statement rather than using InputFormat directly
* see {@link org.apache.hive.hcatalog.streaming.TestStreaming#checkDataWritten2(Path, long, long, int, String, String...)}
* @param numSplitsExpected
* @return
* @throws Exception
*/
List<Record> readRecords(int numSplitsExpected) throws Exception {
if (currentDeltas.isEmpty()) {
throw new AssertionError("No data");
}
InputFormat<NullWritable, OrcStruct> inputFormat = new OrcInputFormat();
JobConf job = new JobConf();
job.set("mapred.input.dir", partitionLocation.toString());
job.set(hive_metastoreConstants.BUCKET_COUNT, Integer.toString(table.getSd().getNumBuckets()));
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
AcidUtils.setAcidOperationalProperties(job, true, null);
job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.toString());
InputSplit[] splits = inputFormat.getSplits(job, 1);
assertEquals(numSplitsExpected, splits.length);
List<Record> records = new ArrayList<>();
for (InputSplit is : splits) {
final AcidRecordReader<NullWritable, OrcStruct> recordReader = (AcidRecordReader<NullWritable, OrcStruct>) inputFormat.getRecordReader(is, job, Reporter.NULL);
NullWritable key = recordReader.createKey();
OrcStruct value = recordReader.createValue();
while (recordReader.next(key, value)) {
RecordIdentifier recordIdentifier = recordReader.getRecordIdentifier();
Record record = new Record(new RecordIdentifier(recordIdentifier.getWriteId(), recordIdentifier.getBucketProperty(), recordIdentifier.getRowId()), value.toString());
System.out.println(record);
records.add(record);
}
recordReader.close();
}
return records;
}
use of org.apache.hadoop.io.NullWritable in project incubator-rya by apache.
the class ForwardChainTest method testTransitiveChain.
/**
* MultipleOutputs support is minimal, so we have to check each map/reduce
* step explicitly
*/
@Test
public void testTransitiveChain() throws Exception {
int max = 8;
int n = 4;
URI prop = TestUtils.uri("subOrganizationOf");
Map<Integer, Map<Integer, Pair<Fact, NullWritable>>> connections = new HashMap<>();
for (int i = 0; i <= max; i++) {
connections.put(i, new HashMap<Integer, Pair<Fact, NullWritable>>());
}
// Initial input: make a chain from org0 to org8
for (int i = 0; i < max; i++) {
URI orgI = TestUtils.uri("org" + i);
URI orgJ = TestUtils.uri("org" + (i + 1));
Fact triple = new Fact(orgI, prop, orgJ);
connections.get(i).put(i + 1, new Pair<>(triple, NullWritable.get()));
}
for (int i = 1; i <= n; i++) {
// Map:
MapDriver<Fact, NullWritable, ResourceWritable, Fact> mDriver = new MapDriver<>();
mDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
mDriver.setMapper(new ForwardChain.FileMapper(schema));
for (int j : connections.keySet()) {
for (int k : connections.get(j).keySet()) {
mDriver.addInput(connections.get(j).get(k));
}
}
List<Pair<ResourceWritable, Fact>> mapped = mDriver.run();
// Convert data for reduce phase:
ReduceFeeder<ResourceWritable, Fact> feeder = new ReduceFeeder<>(mDriver.getConfiguration());
List<KeyValueReuseList<ResourceWritable, Fact>> intermediate = feeder.sortAndGroup(mapped, new ResourceWritable.SecondaryComparator(), new ResourceWritable.PrimaryComparator());
// Reduce, and compare to expected output:
ReduceDriver<ResourceWritable, Fact, Fact, NullWritable> rDriver = new ReduceDriver<>();
rDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
rDriver.setReducer(new ForwardChain.ReasoningReducer(schema));
rDriver.addAllElements(intermediate);
int maxSpan = (int) Math.pow(2, i);
int minSpan = (maxSpan / 2) + 1;
// For each j, build all paths starting with j:
for (int j = 0; j < max; j++) {
// This includes any path of length k for appropriate k:
for (int k = minSpan; k <= maxSpan && j + k <= max; k++) {
int middle = j + minSpan - 1;
URI left = TestUtils.uri("org" + j);
URI right = TestUtils.uri("org" + (j + k));
Fact triple = new Fact(left, prop, right, i, OwlRule.PRP_TRP, TestUtils.uri("org" + middle));
triple.addSource(connections.get(j).get(middle).getFirst());
triple.addSource(connections.get(middle).get(j + k).getFirst());
Pair<Fact, NullWritable> expected = new Pair<>(triple, NullWritable.get());
connections.get(j).put(j + k, expected);
rDriver.addMultiOutput("intermediate", expected);
}
}
rDriver.runTest();
}
}
use of org.apache.hadoop.io.NullWritable in project druid by druid-io.
the class OrcHadoopInputRowParserTest method getAllRows.
private static List<InputRow> getAllRows(HadoopDruidIndexerConfig config) throws IOException {
Job job = Job.getInstance(new Configuration());
config.intoConfiguration(job);
File testFile = new File(((StaticPathSpec) config.getPathSpec()).getPaths());
Path path = new Path(testFile.getAbsoluteFile().toURI());
FileSplit split = new FileSplit(path, 0, testFile.length(), new String[] { "host" });
InputFormat<NullWritable, OrcStruct> inputFormat = ReflectionUtils.newInstance(OrcInputFormat.class, job.getConfiguration());
RecordReader<NullWritable, OrcStruct> reader = inputFormat.getRecordReader(split, new JobConf(job.getConfiguration()), null);
try {
List<InputRow> records = new ArrayList<>();
InputRowParser parser = config.getParser();
final NullWritable key = reader.createKey();
OrcStruct value = reader.createValue();
while (reader.next(key, value)) {
records.add(((List<InputRow>) parser.parseBatch(value)).get(0));
value = reader.createValue();
}
return records;
} finally {
reader.close();
}
}
use of org.apache.hadoop.io.NullWritable in project beam by apache.
the class WritableCoderTest method testNullWritableEncoding.
@Test
public void testNullWritableEncoding() throws Exception {
NullWritable value = NullWritable.get();
WritableCoder<NullWritable> coder = WritableCoder.of(NullWritable.class);
CoderProperties.coderDecodeEncodeEqual(coder, value);
}
use of org.apache.hadoop.io.NullWritable in project hive by apache.
the class ParquetRecordReaderWrapper method next.
@Override
public boolean next(final NullWritable key, final ArrayWritable value) throws IOException {
if (eof) {
return false;
}
try {
if (firstRecord) {
// key & value are already read.
firstRecord = false;
} else if (!realReader.nextKeyValue()) {
// strictly not required, just for consistency
eof = true;
return false;
}
final ArrayWritable tmpCurValue = realReader.getCurrentValue();
if (value != tmpCurValue) {
final Writable[] arrValue = value.get();
final Writable[] arrCurrent = tmpCurValue.get();
if (value != null && arrValue.length == arrCurrent.length) {
System.arraycopy(arrCurrent, 0, arrValue, 0, arrCurrent.length);
} else {
if (arrValue.length != arrCurrent.length) {
throw new IOException("DeprecatedParquetHiveInput : size of object differs. Value" + " size : " + arrValue.length + ", Current Object size : " + arrCurrent.length);
} else {
throw new IOException("DeprecatedParquetHiveInput can not support RecordReaders that" + " don't return same key & value & value is null");
}
}
}
return true;
} catch (final InterruptedException e) {
throw new IOException(e);
}
}
Aggregations