Example 96 with IntWritable

use of in project hive by apache.

the class TestMultiOutputFormat method testMultiOutputFormatWithoutReduce.

   * A test job that reads a input file and outputs each word and the index of
   * the word encountered to a text file and sequence file with different key
   * values.
public void testMultiOutputFormatWithoutReduce() throws Throwable {
    Job job = new Job(mrConf, "MultiOutNoReduce");
    JobConfigurer configurer = MultiOutputFormat.createConfigurer(job);
    configurer.addOutputFormat("out1", TextOutputFormat.class, IntWritable.class, Text.class);
    configurer.addOutputFormat("out2", SequenceFileOutputFormat.class, Text.class, IntWritable.class);
    Path outDir = new Path(workDir.getPath(), job.getJobName());
    FileOutputFormat.setOutputPath(configurer.getJob("out1"), new Path(outDir, "out1"));
    FileOutputFormat.setOutputPath(configurer.getJob("out2"), new Path(outDir, "out2"));
    String fileContent = "Hello World";
    String inputFile = createInputFile(fileContent);
    FileInputFormat.setInputPaths(job, new Path(inputFile));
    //Test for merging of configs
    DistributedCache.addFileToClassPath(new Path(inputFile), job.getConfiguration(), fs);
    String dummyFile = createInputFile("dummy file");
    DistributedCache.addFileToClassPath(new Path(dummyFile), configurer.getJob("out1").getConfiguration(), fs);
    // duplicate of the value. Merging should remove duplicates
    DistributedCache.addFileToClassPath(new Path(inputFile), configurer.getJob("out2").getConfiguration(), fs);
    // Verify if the configs are merged
    Path[] fileClassPaths = DistributedCache.getFileClassPaths(job.getConfiguration());
    List<Path> fileClassPathsList = Arrays.asList(fileClassPaths);
    Assert.assertTrue("Cannot find " + (new Path(inputFile)) + " in " + fileClassPathsList, fileClassPathsList.contains(new Path(inputFile)));
    Assert.assertTrue("Cannot find " + (new Path(dummyFile)) + " in " + fileClassPathsList, fileClassPathsList.contains(new Path(dummyFile)));
    URI[] cacheFiles = DistributedCache.getCacheFiles(job.getConfiguration());
    List<URI> cacheFilesList = Arrays.asList(cacheFiles);
    URI inputFileURI = new Path(inputFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + inputFileURI + " in " + cacheFilesList, cacheFilesList.contains(inputFileURI));
    URI dummyFileURI = new Path(dummyFile).makeQualified(fs).toUri();
    Assert.assertTrue("Cannot find " + dummyFileURI + " in " + cacheFilesList, cacheFilesList.contains(dummyFileURI));
    Path textOutPath = new Path(outDir, "out1/part-m-00000");
    String[] textOutput = readFully(textOutPath).split("\n");
    Path seqOutPath = new Path(outDir, "out2/part-m-00000");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, seqOutPath, mrConf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    String[] words = fileContent.split(" ");
    Assert.assertEquals(words.length, textOutput.length);"Verifying file contents");
    for (int i = 0; i < words.length; i++) {
        Assert.assertEquals((i + 1) + "\t" + words[i], textOutput[i]);, value);
        Assert.assertEquals(words[i], key.toString());
        Assert.assertEquals((i + 1), value.get());
    Assert.assertFalse(, value));
Also used : Path(org.apache.hadoop.fs.Path) JobConfigurer(org.apache.hive.hcatalog.mapreduce.MultiOutputFormat.JobConfigurer) Text( URI( SequenceFile( Job(org.apache.hadoop.mapreduce.Job) IntWritable( Test(org.junit.Test)

Example 97 with IntWritable

use of in project crunch by cloudera.

the class TupleWritablePartitionerTest method testGetPartition.

public void testGetPartition() {
    IntWritable intWritable = new IntWritable(3);
    TupleWritable key = new TupleWritable(new Writable[] { intWritable });
    assertEquals(3, tupleWritableParitioner.getPartition(key, NullWritable.get(), 5));
    assertEquals(1, tupleWritableParitioner.getPartition(key, NullWritable.get(), 2));
Also used : TupleWritable(org.apache.crunch.types.writable.TupleWritable) IntWritable( Test(org.junit.Test)

Example 98 with IntWritable

use of in project learning-spark by databricks.

the class BasicLoadSequenceFile method main.

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        throw new Exception("Usage BasicLoadSequenceFile [sparkMaster] [input]");
    String master = args[0];
    String fileName = args[1];
    JavaSparkContext sc = new JavaSparkContext(master, "basicloadsequencefile", System.getenv("SPARK_HOME"), System.getenv("JARS"));
    JavaPairRDD<Text, IntWritable> input = sc.sequenceFile(fileName, Text.class, IntWritable.class);
    JavaPairRDD<String, Integer> result = input.mapToPair(new ConvertToNativeTypes());
    List<Tuple2<String, Integer>> resultList = result.collect();
    for (Tuple2<String, Integer> record : resultList) {
Also used : Tuple2(scala.Tuple2) Text( JavaSparkContext( IntWritable(

Example 99 with IntWritable

use of in project learning-spark by databricks.

the class BasicSaveSequenceFile method main.

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        throw new Exception("Usage BasicSaveSequenceFile [sparkMaster] [output]");
    String master = args[0];
    String fileName = args[1];
    JavaSparkContext sc = new JavaSparkContext(master, "basicloadsequencefile", System.getenv("SPARK_HOME"), System.getenv("JARS"));
    List<Tuple2<String, Integer>> input = new ArrayList();
    input.add(new Tuple2("coffee", 1));
    input.add(new Tuple2("coffee", 2));
    input.add(new Tuple2("pandas", 3));
    JavaPairRDD<String, Integer> rdd = sc.parallelizePairs(input);
    JavaPairRDD<Text, IntWritable> result = rdd.mapToPair(new ConvertToWritableTypes());
    result.saveAsHadoopFile(fileName, Text.class, IntWritable.class, SequenceFileOutputFormat.class);
Also used : Tuple2(scala.Tuple2) ArrayList(java.util.ArrayList) Text( JavaSparkContext( IntWritable(

Example 100 with IntWritable

use of in project hadoop-book by elephantscale.

the class Reduce method reduce.

public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
    int sum = 0;
    while (values.hasNext()) {
        sum +=;
    output.collect(key, new IntWritable(sum));
Also used : IntWritable(


