Search in sources :

Example 91 with NullWritable

use of org.apache.hadoop.io.NullWritable in project jena by apache.

the class CharacteristicSetReducerTest method characteristic_set_reducer_06.

/**
     * Test characteristic set reduction
     * 
     * @throws IOException
     */
@Test
public void characteristic_set_reducer_06() throws IOException {
    MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this.getMapReduceDriver();
    this.createSet(driver, 2, 1, "http://predicate", "http://other");
    this.createSet(driver, 1, 1, "http://other");
    driver.runTest(false);
    driver = getMapReduceDriver();
    createSet(driver, 2, 1, "http://predicate", "http://other");
    createSet(driver, 1, 1, "http://other");
    List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
    for (Pair<CharacteristicSetWritable, NullWritable> pair : results) {
        CharacteristicSetWritable cw = pair.getFirst();
        boolean expectTwo = cw.hasCharacteristic("http://predicate");
        Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get());
    }
}
Also used : CharacteristicSetWritable(org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable) NullWritable(org.apache.hadoop.io.NullWritable) Pair(org.apache.hadoop.mrunit.types.Pair) Test(org.junit.Test)

Example 92 with NullWritable

use of org.apache.hadoop.io.NullWritable in project spark-dataflow by cloudera.

the class TransformTranslator method writeAvro.

private static <T> TransformEvaluator<AvroIO.Write.Bound<T>> writeAvro() {
    return new TransformEvaluator<AvroIO.Write.Bound<T>>() {

        @Override
        public void evaluate(AvroIO.Write.Bound<T> transform, EvaluationContext context) {
            Job job;
            try {
                job = Job.getInstance();
            } catch (IOException e) {
                throw new IllegalStateException(e);
            }
            AvroJob.setOutputKeySchema(job, transform.getSchema());
            @SuppressWarnings("unchecked") JavaPairRDD<AvroKey<T>, NullWritable> last = ((JavaRDDLike<WindowedValue<T>, ?>) context.getInputRDD(transform)).map(WindowingHelpers.<T>unwindowFunction()).mapToPair(new PairFunction<T, AvroKey<T>, NullWritable>() {

                @Override
                public Tuple2<AvroKey<T>, NullWritable> call(T t) throws Exception {
                    return new Tuple2<>(new AvroKey<>(t), NullWritable.get());
                }
            });
            ShardTemplateInformation shardTemplateInfo = new ShardTemplateInformation(transform.getNumShards(), transform.getShardTemplate(), transform.getFilenamePrefix(), transform.getFilenameSuffix());
            writeHadoopFile(last, job.getConfiguration(), shardTemplateInfo, AvroKey.class, NullWritable.class, TemplatedAvroKeyOutputFormat.class);
        }
    };
}
Also used : AvroIO(com.google.cloud.dataflow.sdk.io.AvroIO) AvroKey(org.apache.avro.mapred.AvroKey) IOException(java.io.IOException) NullWritable(org.apache.hadoop.io.NullWritable) CannotProvideCoderException(com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException) IOException(java.io.IOException) WindowedValue(com.google.cloud.dataflow.sdk.util.WindowedValue) Tuple2(scala.Tuple2) Job(org.apache.hadoop.mapreduce.Job) AvroJob(org.apache.avro.mapreduce.AvroJob)

Example 93 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class TestJdbcWithMiniLlap method processQuery.

private int processQuery(String currentDatabase, String query, int numSplits, RowProcessor rowProcessor) throws Exception {
    String url = miniHS2.getJdbcURL();
    String user = System.getProperty("user.name");
    String pwd = user;
    String handleId = UUID.randomUUID().toString();
    LlapRowInputFormat inputFormat = new LlapRowInputFormat();
    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);
    job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
    if (currentDatabase != null) {
        job.set(LlapBaseInputFormat.DB_KEY, currentDatabase);
    }
    InputSplit[] splits = inputFormat.getSplits(job, numSplits);
    assertTrue(splits.length > 0);
    // Fetch rows from splits
    boolean first = true;
    int rowCount = 0;
    for (InputSplit split : splits) {
        System.out.println("Processing split " + split.getLocations());
        int numColumns = 2;
        RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null);
        Row row = reader.createValue();
        while (reader.next(NullWritable.get(), row)) {
            rowProcessor.process(row);
            ++rowCount;
        }
        reader.close();
    }
    LlapBaseInputFormat.close(handleId);
    return rowCount;
}
Also used : LlapRowInputFormat(org.apache.hadoop.hive.llap.LlapRowInputFormat) Row(org.apache.hadoop.hive.llap.Row) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit) NullWritable(org.apache.hadoop.io.NullWritable)

Example 94 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class TempletonControllerJob method run.

/**
 * Enqueue the job and print out the job id for later collection.
 * @see org.apache.hive.hcatalog.templeton.CompleteDelegator
 */
@Override
public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, TException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Preparing to submit job: " + Arrays.toString(args));
    }
    Configuration conf = getConf();
    conf.set(JAR_ARGS_NAME, TempletonUtils.encodeArray(args));
    String memoryMb = appConf.mapperMemoryMb();
    if (memoryMb != null && memoryMb.length() != 0) {
        conf.set(AppConfig.HADOOP_MAP_MEMORY_MB, memoryMb);
    }
    String amMemoryMB = appConf.amMemoryMb();
    if (amMemoryMB != null && !amMemoryMB.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_MEMORY_MB, amMemoryMB);
    }
    String amJavaOpts = appConf.controllerAMChildOpts();
    if (amJavaOpts != null && !amJavaOpts.isEmpty()) {
        conf.set(AppConfig.HADOOP_MR_AM_JAVA_OPTS, amJavaOpts);
    }
    String user = UserGroupInformation.getCurrentUser().getShortUserName();
    conf.set("user.name", user);
    job = new Job(conf);
    job.setJarByClass(LaunchMapper.class);
    job.setJobName(TempletonControllerJob.class.getSimpleName());
    job.setMapperClass(LaunchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setInputFormatClass(SingleInputFormat.class);
    NullOutputFormat<NullWritable, NullWritable> of = new NullOutputFormat<NullWritable, NullWritable>();
    job.setOutputFormatClass(of.getClass());
    job.setNumReduceTasks(0);
    JobClient jc = new JobClient(new JobConf(job.getConfiguration()));
    if (UserGroupInformation.isSecurityEnabled()) {
        Token<DelegationTokenIdentifier> mrdt = jc.getDelegationToken(new Text("mr token"));
        job.getCredentials().addToken(new Text("mr token"), mrdt);
    }
    String metastoreTokenStrForm = addHMSToken(job, user);
    job.submit();
    JobID submittedJobId = job.getJobID();
    if (metastoreTokenStrForm != null) {
        // so that it can be cancelled later from CompleteDelegator
        DelegationTokenCache.getStringFormTokenCache().storeDelegationToken(submittedJobId.toString(), metastoreTokenStrForm);
        LOG.debug("Added metastore delegation token for jobId=" + submittedJobId.toString() + " user=" + user);
    }
    return 0;
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) DelegationTokenIdentifier(org.apache.hadoop.mapreduce.security.token.delegation.DelegationTokenIdentifier) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) JobClient(org.apache.hadoop.mapred.JobClient) Job(org.apache.hadoop.mapreduce.Job) JobConf(org.apache.hadoop.mapred.JobConf) NullOutputFormat(org.apache.hadoop.mapreduce.lib.output.NullOutputFormat) JobID(org.apache.hadoop.mapreduce.JobID)

Example 95 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class JdbcSerDe method deserialize.

@Override
public Object deserialize(Writable blob) throws SerDeException {
    LOGGER.debug("Deserializing from SerDe");
    if (!(blob instanceof MapWritable)) {
        throw new SerDeException("Expected MapWritable. Got " + blob.getClass().getName());
    }
    if ((row == null) || (columnNames == null)) {
        throw new SerDeException("JDBC SerDe hasn't been initialized properly");
    }
    row.clear();
    MapWritable input = (MapWritable) blob;
    Text columnKey = new Text();
    for (int i = 0; i < numColumns; i++) {
        columnKey.set(columnNames.get(i));
        Writable value = input.get(columnKey);
        row.add(value instanceof NullWritable ? null : ((ObjectWritable) value).get());
    }
    return row;
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Writable(org.apache.hadoop.io.Writable) MapWritable(org.apache.hadoop.io.MapWritable) ObjectWritable(org.apache.hadoop.io.ObjectWritable) Text(org.apache.hadoop.io.Text) ObjectWritable(org.apache.hadoop.io.ObjectWritable) MapWritable(org.apache.hadoop.io.MapWritable) NullWritable(org.apache.hadoop.io.NullWritable) SerDeException(org.apache.hadoop.hive.serde2.SerDeException)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)101 Test (org.junit.Test)65 Configuration (org.apache.hadoop.conf.Configuration)41 Path (org.apache.hadoop.fs.Path)41 File (java.io.File)29 FileSystem (org.apache.hadoop.fs.FileSystem)26 SequenceFile (org.apache.hadoop.io.SequenceFile)22 JobConf (org.apache.hadoop.mapred.JobConf)22 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)16 InputSplit (org.apache.hadoop.mapred.InputSplit)16 LongWritable (org.apache.hadoop.io.LongWritable)15 IntWritable (org.apache.hadoop.io.IntWritable)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8 IOException (java.io.IOException)7 StructObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector)7 FloatWritable (org.apache.hadoop.io.FloatWritable)7