use of org.apache.hadoop.mapreduce.JobContext in project jena by apache.
the class AbstractNodeTupleOutputFormatTests method testOutput.
/**
* Tests output
*
* @param f
* File to output to
* @param num
* Number of tuples to output
* @throws IOException
* @throws InterruptedException
*/
protected final void testOutput(File f, int num) throws IOException, InterruptedException {
// Prepare configuration
Configuration config = this.prepareConfiguration();
// Set up fake job
OutputFormat<NullWritable, T> outputFormat = this.getOutputFormat();
Job job = Job.getInstance(config);
job.setOutputFormatClass(outputFormat.getClass());
this.addOutputPath(f, job.getConfiguration(), job);
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
Assert.assertNotNull(FileOutputFormat.getOutputPath(context));
// Output the data
TaskAttemptID id = new TaskAttemptID("outputTest", 1, TaskType.MAP, 1, 1);
TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), id);
RecordWriter<NullWritable, T> writer = outputFormat.getRecordWriter(taskContext);
Iterator<T> tuples = this.generateTuples(num);
while (tuples.hasNext()) {
writer.write(NullWritable.get(), tuples.next());
}
writer.close(taskContext);
// Check output
File outputFile = this.findOutputFile(this.folder.getRoot(), context);
Assert.assertNotNull(outputFile);
this.checkTuples(outputFile, num);
}
use of org.apache.hadoop.mapreduce.JobContext in project jena by apache.
the class AbstractNodeTupleInputFormatTests method testSingleInput.
/**
* Runs a test with a single input
*
* @param config
* Configuration
* @param input
* Input
* @param expectedTuples
* Expected tuples
* @throws IOException
* @throws InterruptedException
*/
protected final void testSingleInput(Configuration config, File input, int expectedSplits, int expectedTuples) throws IOException, InterruptedException {
// Set up fake job
InputFormat<LongWritable, T> inputFormat = this.getInputFormat();
Job job = Job.getInstance(config);
job.setInputFormatClass(inputFormat.getClass());
this.addInputPath(input, job.getConfiguration(), job);
JobContext context = new JobContextImpl(job.getConfiguration(), job.getJobID());
Assert.assertEquals(1, FileInputFormat.getInputPaths(context).length);
NLineInputFormat.setNumLinesPerSplit(job, LARGE_SIZE);
// Check splits
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(expectedSplits, splits.size());
// Check tuples
for (InputSplit split : splits) {
TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<LongWritable, T> reader = inputFormat.createRecordReader(split, taskContext);
reader.initialize(split, taskContext);
this.checkTuples(reader, expectedTuples);
}
}
use of org.apache.hadoop.mapreduce.JobContext in project elephant-bird by twitter.
the class TestLuceneIndexInputFormat method testGetSplits.
@Test
public void testGetSplits() throws Exception {
DummyLuceneInputFormat lif = new DummyLuceneInputFormat();
Configuration conf = new Configuration();
LuceneIndexInputFormat.setInputPaths(ImmutableList.of(new Path("src/test/resources/com/twitter/elephantbird" + "/mapreduce/input/sample_indexes/")), conf);
LuceneIndexInputFormat.setMaxCombinedIndexSizePerSplitBytes(15L, conf);
JobContext jobContext = createStrictMock(JobContext.class);
expect(HadoopCompat.getConfiguration(jobContext)).andStubReturn(conf);
replay(jobContext);
List<InputSplit> splits = lif.getSplits(jobContext);
LuceneIndexInputSplit split = (LuceneIndexInputSplit) splits.get(0);
assertEquals(2, split.getIndexDirs().size());
assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-1"));
assertTrue(split.getIndexDirs().get(1).toString().endsWith("sample_indexes/more-indexes/index-3"));
split = (LuceneIndexInputSplit) splits.get(1);
assertEquals(1, split.getIndexDirs().size());
assertTrue(split.getIndexDirs().get(0).toString().endsWith("sample_indexes/index-2"));
}
use of org.apache.hadoop.mapreduce.JobContext in project incubator-rya by apache.
the class GraphXInputFormatTest method testInputFormat.
@Test
public void testInputFormat() throws Exception {
RyaStatement input = RyaStatement.builder().setSubject(new RyaURI("http://www.google.com")).setPredicate(new RyaURI("http://some_other_uri")).setObject(new RyaURI("http://www.yahoo.com")).setColumnVisibility(new byte[0]).setValue(new byte[0]).build();
apiImpl.add(input);
Job jobConf = Job.getInstance();
GraphXInputFormat.setMockInstance(jobConf, instance.getInstanceName());
GraphXInputFormat.setConnectorInfo(jobConf, username, password);
GraphXInputFormat.setInputTableName(jobConf, table);
GraphXInputFormat.setInputTableName(jobConf, table);
GraphXInputFormat.setScanIsolation(jobConf, false);
GraphXInputFormat.setLocalIterators(jobConf, false);
GraphXInputFormat.setOfflineTableScan(jobConf, false);
GraphXInputFormat inputFormat = new GraphXInputFormat();
JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(1, splits.size());
TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));
RecordReader<Object, RyaTypeWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
RyaStatementRecordReader ryaStatementRecordReader = (RyaStatementRecordReader) reader;
ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);
List<RyaType> results = new ArrayList<RyaType>();
System.out.println("before while");
while (ryaStatementRecordReader.nextKeyValue()) {
System.out.println("in while");
RyaTypeWritable writable = ryaStatementRecordReader.getCurrentValue();
RyaType value = writable.getRyaType();
Object text = ryaStatementRecordReader.getCurrentKey();
RyaType type = new RyaType();
type.setData(value.getData());
type.setDataType(value.getDataType());
results.add(type);
System.out.println(value.getData());
System.out.println(value.getDataType());
System.out.println(results);
System.out.println(type);
System.out.println(text);
System.out.println(value);
}
System.out.println("after while");
System.out.println(results.size());
System.out.println(results);
// Assert.assertTrue(results.size() == 2);
// Assert.assertTrue(results.contains(input));
}
use of org.apache.hadoop.mapreduce.JobContext in project incubator-rya by apache.
the class RyaInputFormatTest method testInputFormat.
@Test
public void testInputFormat() throws Exception {
RyaStatement input = RyaStatement.builder().setSubject(new RyaURI("http://www.google.com")).setPredicate(new RyaURI("http://some_other_uri")).setObject(new RyaURI("http://www.yahoo.com")).setColumnVisibility(new byte[0]).setValue(new byte[0]).build();
apiImpl.add(input);
Job jobConf = Job.getInstance();
RyaInputFormat.setMockInstance(jobConf, instance.getInstanceName());
RyaInputFormat.setConnectorInfo(jobConf, username, password);
RyaInputFormat.setTableLayout(jobConf, TABLE_LAYOUT.SPO);
AccumuloInputFormat.setInputTableName(jobConf, table);
AccumuloInputFormat.setInputTableName(jobConf, table);
AccumuloInputFormat.setScanIsolation(jobConf, false);
AccumuloInputFormat.setLocalIterators(jobConf, false);
AccumuloInputFormat.setOfflineTableScan(jobConf, false);
RyaInputFormat inputFormat = new RyaInputFormat();
JobContext context = new JobContextImpl(jobConf.getConfiguration(), jobConf.getJobID());
List<InputSplit> splits = inputFormat.getSplits(context);
Assert.assertEquals(1, splits.size());
TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(context.getConfiguration(), new TaskAttemptID(new TaskID(), 1));
RecordReader<Text, RyaStatementWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
RyaStatementRecordReader ryaStatementRecordReader = (RyaStatementRecordReader) reader;
ryaStatementRecordReader.initialize(splits.get(0), taskAttemptContext);
List<RyaStatement> results = new ArrayList<RyaStatement>();
while (ryaStatementRecordReader.nextKeyValue()) {
RyaStatementWritable writable = ryaStatementRecordReader.getCurrentValue();
RyaStatement value = writable.getRyaStatement();
Text text = ryaStatementRecordReader.getCurrentKey();
RyaStatement stmt = RyaStatement.builder().setSubject(value.getSubject()).setPredicate(value.getPredicate()).setObject(value.getObject()).setContext(value.getContext()).setQualifier(value.getQualifer()).setColumnVisibility(value.getColumnVisibility()).setValue(value.getValue()).build();
results.add(stmt);
System.out.println(text);
System.out.println(value);
}
Assert.assertTrue(results.size() == 2);
Assert.assertTrue(results.contains(input));
}
Aggregations