Search in sources :

Example 41 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project druid by druid-io.

the class IndexGeneratorCombinerTest method testSingleRowNoMergePassThrough.

@Test
public void testSingleRowNoMergePassThrough() throws Exception {
    Reducer.Context context = EasyMock.createMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey = Capture.newInstance();
    Capture<BytesWritable> captureVal = Capture.newInstance();
    context.write(EasyMock.capture(captureKey), EasyMock.capture(captureVal));
    EasyMock.replay(context);
    BytesWritable key = new BytesWritable("dummy_key".getBytes());
    BytesWritable val = new BytesWritable("dummy_row".getBytes());
    combiner.reduce(key, Lists.newArrayList(val), context);
    Assert.assertTrue(captureKey.getValue() == key);
    Assert.assertTrue(captureVal.getValue() == val);
}
Also used : BytesWritable(org.apache.hadoop.io.BytesWritable) Reducer(org.apache.hadoop.mapreduce.Reducer) Test(org.junit.Test)

Example 42 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project druid by druid-io.

the class IndexGeneratorCombinerTest method testMultipleRowsMerged.

@Test
public void testMultipleRowsMerged() throws Exception {
    long timestamp = System.currentTimeMillis();
    Bucket bucket = new Bucket(0, new DateTime(timestamp), 0);
    SortableBytes keySortableBytes = new SortableBytes(bucket.toGroupKey(), new byte[0]);
    BytesWritable key = keySortableBytes.toBytesWritable();
    InputRow row1 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host1", "keywords", Arrays.asList("foo", "bar"), "visited", 10));
    InputRow row2 = new MapBasedInputRow(timestamp, ImmutableList.<String>of("keywords"), ImmutableMap.<String, Object>of("host", "host2", "keywords", Arrays.asList("foo", "bar"), "visited", 5));
    List<BytesWritable> rows = Lists.newArrayList(new BytesWritable(InputRowSerde.toBytes(row1, aggregators, true)), new BytesWritable(InputRowSerde.toBytes(row2, aggregators, true)));
    Reducer.Context context = EasyMock.createNiceMock(Reducer.Context.class);
    Capture<BytesWritable> captureKey = Capture.newInstance();
    Capture<BytesWritable> captureVal = Capture.newInstance();
    context.write(EasyMock.capture(captureKey), EasyMock.capture(captureVal));
    EasyMock.replay(context);
    combiner.reduce(key, rows, context);
    EasyMock.verify(context);
    Assert.assertTrue(captureKey.getValue() == key);
    InputRow capturedRow = InputRowSerde.fromBytes(captureVal.getValue().getBytes(), aggregators);
    Assert.assertEquals(Arrays.asList("host", "keywords"), capturedRow.getDimensions());
    Assert.assertEquals(ImmutableList.of(), capturedRow.getDimension("host"));
    Assert.assertEquals(Arrays.asList("bar", "foo"), capturedRow.getDimension("keywords"));
    Assert.assertEquals(15, capturedRow.getLongMetric("visited_sum"));
    Assert.assertEquals(2.0, (Double) HyperUniquesAggregatorFactory.estimateCardinality(capturedRow.getRaw("unique_hosts")), 0.001);
}
Also used : MapBasedInputRow(io.druid.data.input.MapBasedInputRow) InputRow(io.druid.data.input.InputRow) BytesWritable(org.apache.hadoop.io.BytesWritable) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Reducer(org.apache.hadoop.mapreduce.Reducer) DateTime(org.joda.time.DateTime) Test(org.junit.Test)

Example 43 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project alluxio by Alluxio.

the class KeyValueRecordReader method nextKeyValue.

@Override
public synchronized boolean nextKeyValue() throws IOException {
    if (!mKeyValuePairIterator.hasNext()) {
        return false;
    }
    KeyValuePair pair;
    try {
        pair = mKeyValuePairIterator.next();
    } catch (AlluxioException e) {
        throw new IOException(e);
    }
    // TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could
    // benefit from zero-copy.
    mCurrentKey.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getKey())));
    mCurrentValue.set(new BytesWritable(BufferUtils.newByteArrayFromByteBuffer(pair.getValue())));
    mNumVisitedKeyValuePairs++;
    return true;
}
Also used : KeyValuePair(alluxio.client.keyvalue.KeyValuePair) BytesWritable(org.apache.hadoop.io.BytesWritable) IOException(java.io.IOException) AlluxioException(alluxio.exception.AlluxioException)

Example 44 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project camel by apache.

the class HdfsConsumerTest method testReadBytes.

@Test
public void testReadBytes() throws Exception {
    if (!canTest()) {
        return;
    }
    final Path file = new Path(new File("target/test/test-camel-bytes").getAbsolutePath());
    Configuration conf = new Configuration();
    FileSystem fs1 = FileSystem.get(file.toUri(), conf);
    SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, BytesWritable.class);
    NullWritable keyWritable = NullWritable.get();
    BytesWritable valueWritable = new BytesWritable();
    String value = "CIAO!";
    valueWritable.set(value.getBytes(), 0, value.getBytes().length);
    writer.append(keyWritable, valueWritable);
    writer.sync();
    writer.close();
    MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
    resultEndpoint.expectedMessageCount(1);
    context.addRoutes(new RouteBuilder() {

        public void configure() {
            from("hdfs:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
        }
    });
    context.start();
    resultEndpoint.assertIsSatisfied();
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) RouteBuilder(org.apache.camel.builder.RouteBuilder) MockEndpoint(org.apache.camel.component.mock.MockEndpoint) FileSystem(org.apache.hadoop.fs.FileSystem) BytesWritable(org.apache.hadoop.io.BytesWritable) ArrayFile(org.apache.hadoop.io.ArrayFile) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 45 with BytesWritable

use of org.apache.hadoop.io.BytesWritable in project hive by apache.

the class LocalHiveSparkClient method execute.

@Override
public SparkJobRef execute(DriverContext driverContext, SparkWork sparkWork) throws Exception {
    Context ctx = driverContext.getCtx();
    HiveConf hiveConf = (HiveConf) ctx.getConf();
    refreshLocalResources(sparkWork, hiveConf);
    JobConf jobConf = new JobConf(hiveConf);
    // Create temporary scratch dir
    Path emptyScratchDir;
    emptyScratchDir = ctx.getMRTmpPath();
    FileSystem fs = emptyScratchDir.getFileSystem(jobConf);
    fs.mkdirs(emptyScratchDir);
    // Update credential provider location
    // the password to the credential provider in already set in the sparkConf
    // in HiveSparkClientFactory
    HiveConfUtil.updateJobCredentialProviders(jobConf);
    SparkCounters sparkCounters = new SparkCounters(sc);
    Map<String, List<String>> prefixes = sparkWork.getRequiredCounterPrefix();
    if (prefixes != null) {
        for (String group : prefixes.keySet()) {
            for (String counterName : prefixes.get(group)) {
                sparkCounters.createCounter(group, counterName);
            }
        }
    }
    SparkReporter sparkReporter = new SparkReporter(sparkCounters);
    // Generate Spark plan
    SparkPlanGenerator gen = new SparkPlanGenerator(sc, ctx, jobConf, emptyScratchDir, sparkReporter);
    SparkPlan plan = gen.generate(sparkWork);
    if (driverContext.isShutdown()) {
        throw new HiveException("Operation is cancelled.");
    }
    // Execute generated plan.
    JavaPairRDD<HiveKey, BytesWritable> finalRDD = plan.generateGraph();
    sc.setJobGroup("queryId = " + sparkWork.getQueryId(), DagUtils.getQueryName(jobConf));
    // We use Spark RDD async action to submit job as it's the only way to get jobId now.
    JavaFutureAction<Void> future = finalRDD.foreachAsync(HiveVoidFunction.getInstance());
    // As we always use foreach action to submit RDD graph, it would only trigger one job.
    int jobId = future.jobIds().get(0);
    LocalSparkJobStatus sparkJobStatus = new LocalSparkJobStatus(sc, jobId, jobMetricsListener, sparkCounters, plan.getCachedRDDIds(), future);
    return new LocalSparkJobRef(Integer.toString(jobId), hiveConf, sparkJobStatus, sc);
}
Also used : JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Context(org.apache.hadoop.hive.ql.Context) DriverContext(org.apache.hadoop.hive.ql.DriverContext) Path(org.apache.hadoop.fs.Path) SparkCounters(org.apache.hive.spark.counter.SparkCounters) HiveException(org.apache.hadoop.hive.ql.metadata.HiveException) BytesWritable(org.apache.hadoop.io.BytesWritable) LocalSparkJobStatus(org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobStatus) HiveKey(org.apache.hadoop.hive.ql.io.HiveKey) FileSystem(org.apache.hadoop.fs.FileSystem) HiveConf(org.apache.hadoop.hive.conf.HiveConf) ArrayList(java.util.ArrayList) List(java.util.List) LocalSparkJobRef(org.apache.hadoop.hive.ql.exec.spark.status.impl.LocalSparkJobRef) JobConf(org.apache.hadoop.mapred.JobConf)

Aggregations

BytesWritable (org.apache.hadoop.io.BytesWritable)339 Test (org.junit.Test)92 Text (org.apache.hadoop.io.Text)81 LongWritable (org.apache.hadoop.io.LongWritable)66 IntWritable (org.apache.hadoop.io.IntWritable)54 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)51 ArrayList (java.util.ArrayList)48 List (java.util.List)48 Path (org.apache.hadoop.fs.Path)47 IOException (java.io.IOException)42 Configuration (org.apache.hadoop.conf.Configuration)41 FloatWritable (org.apache.hadoop.io.FloatWritable)37 Writable (org.apache.hadoop.io.Writable)36 BooleanWritable (org.apache.hadoop.io.BooleanWritable)35 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)27 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)26 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)26 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)25 Random (java.util.Random)24