Search in sources :

Example 1 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hive by apache.

the class LlapDump method main.

public static void main(String[] args) throws Exception {
    Options opts = createOptions();
    CommandLine cli = new GnuParser().parse(opts, args);
    if (cli.hasOption('h')) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("llapdump", opts);
        return;
    }
    if (cli.hasOption('l')) {
        url = cli.getOptionValue("l");
    }
    if (cli.hasOption('u')) {
        user = cli.getOptionValue("u");
    }
    if (cli.hasOption('p')) {
        pwd = cli.getOptionValue("p");
    }
    if (cli.hasOption('n')) {
        numSplits = cli.getOptionValue("n");
    }
    Properties configProps = cli.getOptionProperties("hiveconf");
    if (cli.getArgs().length > 0) {
        query = cli.getArgs()[0];
    }
    if (query == null) {
        throw new IllegalArgumentException("No query string specified");
    }
    System.out.println("url: " + url);
    System.out.println("user: " + user);
    System.out.println("query: " + query);
    LlapRowInputFormat format = new LlapRowInputFormat();
    JobConf job = new JobConf();
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);
    // Additional conf settings specified on the command line
    for (String key : configProps.stringPropertyNames()) {
        job.set(key, configProps.getProperty(key));
    }
    InputSplit[] splits = format.getSplits(job, Integer.parseInt(numSplits));
    if (splits.length == 0) {
        System.out.println("No splits returned - empty scan");
        System.out.println("Results: ");
    } else {
        boolean first = true;
        for (InputSplit s : splits) {
            LOG.info("Processing input split s from " + Arrays.toString(s.getLocations()));
            RecordReader<NullWritable, Row> reader = format.getRecordReader(s, job, null);
            if (reader instanceof LlapRowRecordReader && first) {
                Schema schema = ((LlapRowRecordReader) reader).getSchema();
                System.out.println("" + schema);
            }
            if (first) {
                System.out.println("Results: ");
                System.out.println("");
                first = false;
            }
            Row value = reader.createValue();
            while (reader.next(NullWritable.get(), value)) {
                printRow(value);
            }
        }
        System.exit(0);
    }
}
Also used : Options(org.apache.commons.cli.Options) GnuParser(org.apache.commons.cli.GnuParser) Properties(java.util.Properties) NullWritable(org.apache.hadoop.io.NullWritable) HelpFormatter(org.apache.commons.cli.HelpFormatter) CommandLine(org.apache.commons.cli.CommandLine) JobConf(org.apache.hadoop.mapred.JobConf) InputSplit(org.apache.hadoop.mapred.InputSplit)

Example 2 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestWrappedRRClassloader method testClassLoader.

/**
   * Tests the class loader set by 
   * {@link Configuration#setClassLoader(ClassLoader)}
   * is inherited by any {@link WrappedRecordReader}s created by
   * {@link CompositeRecordReader}
   */
@Test
public void testClassLoader() throws Exception {
    Configuration conf = new Configuration();
    Fake_ClassLoader classLoader = new Fake_ClassLoader();
    conf.setClassLoader(classLoader);
    assertTrue(conf.getClassLoader() instanceof Fake_ClassLoader);
    FileSystem fs = FileSystem.get(conf);
    Path testdir = new Path(System.getProperty("test.build.data", "/tmp")).makeQualified(fs);
    Path base = new Path(testdir, "/empty");
    Path[] src = { new Path(base, "i0"), new Path("i1"), new Path("i2") };
    conf.set(CompositeInputFormat.JOIN_EXPR, CompositeInputFormat.compose("outer", IF_ClassLoaderChecker.class, src));
    CompositeInputFormat<NullWritable> inputFormat = new CompositeInputFormat<NullWritable>();
    // create dummy TaskAttemptID
    TaskAttemptID tid = new TaskAttemptID("jt", 1, TaskType.MAP, 0, 0);
    conf.set(MRJobConfig.TASK_ATTEMPT_ID, tid.toString());
    inputFormat.createRecordReader(inputFormat.getSplits(Job.getInstance(conf)).get(0), new TaskAttemptContextImpl(conf, tid));
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) TaskAttemptID(org.apache.hadoop.mapreduce.TaskAttemptID) FileSystem(org.apache.hadoop.fs.FileSystem) TaskAttemptContextImpl(org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 3 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestInputSampler method testSplitSampler.

/**
   * Verify SplitSampler contract, that an equal number of records are taken
   * from the first splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testSplitSampler() throws Exception {
    final int TOT_SPLITS = 15;
    final int NUM_SPLITS = 5;
    final int STEP_SAMPLE = 5;
    final int NUM_SAMPLES = NUM_SPLITS * STEP_SAMPLE;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.SplitSampler<IntWritable, NullWritable>(NUM_SAMPLES, NUM_SPLITS);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i * STEP_SAMPLE;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(100000, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : Job(org.apache.hadoop.mapreduce.Job) NullWritable(org.apache.hadoop.io.NullWritable) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 4 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestInputSampler method testIntervalSampler.

/**
   * Verify IntervalSampler contract, that samples are taken at regular
   * intervals from the given splits.
   */
@Test
// IntWritable comparator not typesafe
@SuppressWarnings("unchecked")
public void testIntervalSampler() throws Exception {
    final int TOT_SPLITS = 16;
    final int PER_SPLIT_SAMPLE = 4;
    final int NUM_SAMPLES = TOT_SPLITS * PER_SPLIT_SAMPLE;
    final double FREQ = 1.0 / TOT_SPLITS;
    InputSampler.Sampler<IntWritable, NullWritable> sampler = new InputSampler.IntervalSampler<IntWritable, NullWritable>(FREQ, NUM_SAMPLES);
    int[] inits = new int[TOT_SPLITS];
    for (int i = 0; i < TOT_SPLITS; ++i) {
        inits[i] = i;
    }
    Job ignored = Job.getInstance();
    Object[] samples = sampler.getSample(new TestInputSamplerIF(NUM_SAMPLES, TOT_SPLITS, inits), ignored);
    assertEquals(NUM_SAMPLES, samples.length);
    Arrays.sort(samples, new IntWritable.Comparator());
    for (int i = 0; i < NUM_SAMPLES; ++i) {
        assertEquals(i, ((IntWritable) samples[i]).get());
    }
}
Also used : NullWritable(org.apache.hadoop.io.NullWritable) Job(org.apache.hadoop.mapreduce.Job) IntWritable(org.apache.hadoop.io.IntWritable) Test(org.junit.Test)

Example 5 with NullWritable

use of org.apache.hadoop.io.NullWritable in project hadoop by apache.

the class TestTotalOrderPartitioner method testTotalOrderMemCmp.

@Test
public void testTotalOrderMemCmp() throws Exception {
    TotalOrderPartitioner<Text, NullWritable> partitioner = new TotalOrderPartitioner<Text, NullWritable>();
    Configuration conf = new Configuration();
    Path p = TestTotalOrderPartitioner.<Text>writePartitionFile("totalordermemcmp", conf, splitStrings);
    conf.setClass(MRJobConfig.MAP_OUTPUT_KEY_CLASS, Text.class, Object.class);
    try {
        partitioner.setConf(conf);
        NullWritable nw = NullWritable.get();
        for (Check<Text> chk : testStrings) {
            assertEquals(chk.data.toString(), chk.part, partitioner.getPartition(chk.data, nw, splitStrings.length + 1));
        }
    } finally {
        p.getFileSystem(conf).delete(p, true);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Aggregations

NullWritable (org.apache.hadoop.io.NullWritable)113 Test (org.junit.Test)68 Path (org.apache.hadoop.fs.Path)47 Configuration (org.apache.hadoop.conf.Configuration)44 File (java.io.File)33 FileSystem (org.apache.hadoop.fs.FileSystem)28 SequenceFile (org.apache.hadoop.io.SequenceFile)24 JobConf (org.apache.hadoop.mapred.JobConf)24 RouteBuilder (org.apache.camel.builder.RouteBuilder)18 MockEndpoint (org.apache.camel.component.mock.MockEndpoint)18 ArrayFile (org.apache.hadoop.io.ArrayFile)18 Text (org.apache.hadoop.io.Text)17 InputSplit (org.apache.hadoop.mapred.InputSplit)17 LongWritable (org.apache.hadoop.io.LongWritable)16 IntWritable (org.apache.hadoop.io.IntWritable)11 IOException (java.io.IOException)10 Writer (org.apache.hadoop.io.SequenceFile.Writer)9 TaskAttemptContextImpl (org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl)8 Pair (org.apache.hadoop.mrunit.types.Pair)8 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)8