use of org.apache.hadoop.io.BytesWritable in project druid by druid-io.
the class IndexGeneratorCombinerTest method testSingleRowNoMergePassThrough.
@Test
public void testSingleRowNoMergePassThrough() throws Exception {
Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
Capture<BytesWritable> captureKey = Capture.newInstance();
Capture<BytesWritable> captureVal = Capture.newInstance();
context.write(EasyMock.capture(captureKey), EasyMock.capture(captureVal));
EasyMock.replay(context);
BytesWritable key = new BytesWritable("dummy_key".getBytes());
BytesWritable val = new BytesWritable("dummy_row".getBytes());
combiner.reduce(key, Lists.newArrayList(val), context);
Assert.assertTrue(captureKey.getValue() == key);
Assert.assertTrue(captureVal.getValue() == val);
}
use of org.apache.hadoop.io.BytesWritable in project druid by druid-io.
the class IndexGeneratorCombinerTest method testMultipleRowsMerged.
@Test
public void testMultipleRowsMerged() throws Exception {
long timestamp = System.currentTimeMillis();
Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
BytesWritable key = keySortableBytes.toBytesWritable();
InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
Capture<BytesWritable> captureKey = Capture.newInstance();
Capture<BytesWritable> captureVal = Capture.newInstance();
context.write(EasyMock.capture(captureKey), EasyMock.capture(captureVal));
EasyMock.replay(context);
combiner.reduce(key, rows, context);
EasyMock.verify(context);
Assert.assertTrue(captureKey.getValue() == key);
InputRow capturedRow = InputRowSerde.fromBytes(captureVal.getValue().getBytes(), aggregators);
Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow.getDimensions());
Assert.assertEquals(ImmutableList.of(), capturedRow.getDimension("host"));
Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow.getDimension("keywords"));
Assert.assertEquals(15, capturedRow.getLongMetric("visited_sum"));
Assert.assertEquals(2.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow.getRaw("unique_hosts")), 0.001);
}
use of org.apache.hadoop.io.BytesWritable in project alluxio by Alluxio.
the class KeyValueRecordReader method nextKeyValue.
@Override
public synchronized boolean nextKeyValue() throws IOException {
if (!mKeyValuePairIterator.hasNext()) {
return false;
}
KeyValuePair pair;
try {
pair = mKeyValuePairIterator.next();
} catch (AlluxioException e) {
throw new IOException(e);
}
// TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could
// benefit from zero-copy.
mCurrentKey.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getKey())));
mCurrentValue.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getValue())));
mNumVisitedKeyValuePairs++;
return true;
}
use of org.apache.hadoop.io.BytesWritable in project camel by apache.
the class HdfsConsumerTest method testReadBytes.
@Test
public void testReadBytes() throws Exception {
if (!canTest()) {
return;
}
final Path file = new Path(new File("target/test/test-camel-bytes").getAbsolutePath());
Configuration conf = new Configuration();
FileSystem fs1 = FileSystem.get(file.toUri(), conf);
SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, BytesWritable.class);
NullWritable keyWritable = NullWritable.get();
BytesWritable valueWritable = new BytesWritable();
String value = "CIAO!";
valueWritable.set(value.getBytes(), 0, value.getBytes().length);
writer.append(keyWritable, valueWritable);
writer.sync();
writer.close();
MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
resultEndpoint.expectedMessageCount(1);
context.addRoutes(new RouteBuilder() {
public void configure() {
from("hdfs:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
}
});
context.start();
resultEndpoint.assertIsSatisfied();
}
use of org.apache.hadoop.io.BytesWritable in project hive by apache.
the class LocalHiveSparkClient method execute.
@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
Context ctx = driverContext.getCtx();
HiveConf hiveConf = (HiveConf) ctx.getConf();
refreshLocalResources(sparkWork, hiveConf);
JobConf jobConf = new JobConf(hiveConf);
// Create temporary scratch dir
Path emptyScratchDir;
emptyScratchDir = ctx.getMRTmpPath();
FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
fs.mkdirs(emptyScratchDir);
// Update credential provider location
// the password to the credential provider in already set in the sparkConf
// in HiveSparkClientFactory
HiveConfUtil.updateJobCredentialProviders(jobConf);
SparkCounters sparkCounters = new SparkCounters(sc);
Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
if (prefixes != null) {
for (String group : prefixes.keySet()) {
for (String counterName : prefixes.get(group)) {
sparkCounters.createCounter(group, counterName);
}
}
}
SparkReporter sparkReporter = new SparkReporter(sparkCounters);
// Generate Spark plan
SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
SparkPlan plan = gen.generate(sparkWork);
if (driverContext.isShutdown()) {
throw new HiveException("Operation is cancelled.");
}
// Execute generated plan.
JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
sc.setJobGroup("queryId = " + sparkWork.getQueryId(), DagUtils.getQueryName(jobConf));
// We use Spark RDD async action to submit job as it's the only way to get jobId now.
JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
// As we always use foreach action to submit RDD graph, it would only trigger one job.
int jobId = future.jobIds().get(0);
LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future);
return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}
Aggregations