use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class AggregateTest method testWritables.
@Test
public void testWritables() throws Exception {
Pipeline pipeline = new MRPipeline(AggregateTest.class);
String shakesInputPath = FileHelper.createTempCopyOf("shakes.txt");
PCollection<String> shakes = pipeline.readTextFile(shakesInputPath);
runMinMax(shakes, WritableTypeFamily.getInstance());
pipeline.done();
}
use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class WordCount method run.
public int run(String[] args) throws Exception {
if (args.length != 3) {
System.err.println();
System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
System.err.println();
GenericOptionsParser.printGenericCommandUsage(System.err);
return 1;
}
// Create an object to coordinate pipeline creation and execution.
Pipeline pipeline = new MRPipeline(WordCount.class, getConf());
// Reference a given text file as a collection of Strings.
PCollection<String> lines = pipeline.readTextFile(args[1]);
// Define a function that splits each line in a PCollection of Strings into a
// PCollection made up of the individual words in the file.
PCollection<String> words = lines.parallelDo(new DoFn<String, String>() {
public void process(String line, Emitter<String> emitter) {
for (String word : line.split("\\s+")) {
emitter.emit(word);
}
}
}, // Indicates the serialization format
Writables.strings());
// The count method applies a series of Crunch primitives and returns
// a map of the unique words in the input PCollection to their counts.
// Best of all, the count() function doesn't need to know anything about
// the kind of data stored in the input PCollection.
PTable<String, Long> counts = words.count();
// Instruct the pipeline to write the resulting counts to a text file.
pipeline.writeTextFile(counts, args[2]);
// Execute the pipeline as a MapReduce.
pipeline.done();
return 0;
}
use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class MemPipelineFileWritingTest method testMemPipelineFileWriter.
@Test
public void testMemPipelineFileWriter() throws Exception {
File tmpDir = Files.createTempDir();
tmpDir.delete();
Pipeline p = MemPipeline.getInstance();
PCollection<String> lines = MemPipeline.collectionOf("hello", "world");
p.writeTextFile(lines, tmpDir.getAbsolutePath());
p.done();
assertTrue(tmpDir.exists());
File[] files = tmpDir.listFiles();
assertTrue(files != null && files.length > 0);
for (File f : files) {
if (!f.getName().startsWith(".")) {
List<String> txt = Files.readLines(f, Charsets.UTF_8);
assertEquals(ImmutableList.of("hello", "world"), txt);
}
}
}
use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class AvroFileSourceTargetTest method testGeneric.
@Test
public void testGeneric() throws IOException {
String genericSchemaJson = Person.SCHEMA$.toString().replace("Person", "GenericPerson");
Schema genericPersonSchema = new Schema.Parser().parse(genericSchemaJson);
GenericRecord savedRecord = new GenericData.Record(genericPersonSchema);
savedRecord.put("name", "John Doe");
savedRecord.put("age", 42);
savedRecord.put("siblingnames", Lists.newArrayList("Jimmy", "Jane"));
populateGenericFile(Lists.newArrayList(savedRecord), genericPersonSchema);
Pipeline pipeline = new MRPipeline(AvroFileSourceTargetTest.class);
PCollection<Record> genericCollection = pipeline.read(At.avroFile(avroFile.getAbsolutePath(), Avros.generics(genericPersonSchema)));
List<Record> recordList = Lists.newArrayList(genericCollection.materialize());
assertEquals(Lists.newArrayList(savedRecord), Lists.newArrayList(recordList));
}
use of org.apache.crunch.Pipeline in project crunch by cloudera.
the class AvroReflectTest method testReflection.
@Test
public void testReflection() throws IOException {
Pipeline pipeline = new MRPipeline(AvroReflectTest.class);
PCollection<StringWrapper> stringWrapperCollection = pipeline.readTextFile(FileHelper.createTempCopyOf("set1.txt")).parallelDo(new MapFn<String, StringWrapper>() {
@Override
public StringWrapper map(String input) {
StringWrapper stringWrapper = new StringWrapper();
stringWrapper.setValue(input);
return stringWrapper;
}
}, Avros.reflects(StringWrapper.class));
List<StringWrapper> stringWrappers = Lists.newArrayList(stringWrapperCollection.materialize());
pipeline.done();
assertEquals(Lists.newArrayList(new StringWrapper("b"), new StringWrapper("c"), new StringWrapper("a"), new StringWrapper("e")), stringWrappers);
}
Aggregations