use of org.apache.hadoop.mrunit.types.Pair in project jena by apache.
the class TriplesToQuadsBySubjectMapperTest method triples_to_quads_mapper_01.
/**
* Tests quads to triples conversion
*
* @throws IOException
*/
@Test
public void triples_to_quads_mapper_01() throws IOException {
MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> driver = this.getMapDriver();
Triple t = new Triple(NodeFactory.createURI("http://s"), NodeFactory.createURI("http://p"), NodeFactory.createLiteral("test"));
Quad q = new Quad(t.getSubject(), t);
driver.withInput(new Pair<LongWritable, TripleWritable>(new LongWritable(1), new TripleWritable(t))).withOutput(new Pair<LongWritable, QuadWritable>(new LongWritable(1), new QuadWritable(q)));
driver.runTest();
}
use of org.apache.hadoop.mrunit.types.Pair in project jena by apache.
the class CharacteristicSetReducerTest method characteristic_set_reducer_04.
/**
* Test characteristic set reduction
*
* @throws IOException
*/
@Test
public void characteristic_set_reducer_04() throws IOException {
MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this.getMapReduceDriver();
this.createSet(driver, 2, 1, "http://predicate");
this.createSet(driver, 1, 1, "http://other");
driver.runTest(false);
driver = getMapReduceDriver();
createSet(driver, 2, 1, "http://predicate");
createSet(driver, 1, 1, "http://other");
List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
for (Pair<CharacteristicSetWritable, NullWritable> pair : results) {
CharacteristicSetWritable cw = pair.getFirst();
boolean expectTwo = cw.getCharacteristics().next().getNode().get().hasURI("http://predicate");
Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get());
}
}
use of org.apache.hadoop.mrunit.types.Pair in project jena by apache.
the class CharacteristicSetReducerTest method characteristic_set_reducer_02.
/**
* Test characteristic set reduction
*
* @throws IOException
*/
@Test
public void characteristic_set_reducer_02() throws IOException {
MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this.getMapReduceDriver();
this.createSet(driver, 2, 1, "http://predicate");
driver.runTest(false);
driver = getMapReduceDriver();
createSet(driver, 2, 1, "http://predicate");
List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
CharacteristicSetWritable cw = results.get(0).getFirst();
Assert.assertEquals(2, cw.getCount().get());
}
use of org.apache.hadoop.mrunit.types.Pair in project incubator-rya by apache.
the class ForwardChainTest method testTransitiveChain.
/**
* MultipleOutputs support is minimal, so we have to check each map/reduce
* step explicitly
*/
@Test
public void testTransitiveChain() throws Exception {
int max = 8;
int n = 4;
URI prop = TestUtils.uri("subOrganizationOf");
Map<Integer, Map<Integer, Pair<Fact, NullWritable>>> connections = new HashMap<>();
for (int i = 0; i <= max; i++) {
connections.put(i, new HashMap<Integer, Pair<Fact, NullWritable>>());
}
// Initial input: make a chain from org0 to org8
for (int i = 0; i < max; i++) {
URI orgI = TestUtils.uri("org" + i);
URI orgJ = TestUtils.uri("org" + (i + 1));
Fact triple = new Fact(orgI, prop, orgJ);
connections.get(i).put(i + 1, new Pair<>(triple, NullWritable.get()));
}
for (int i = 1; i <= n; i++) {
// Map:
MapDriver<Fact, NullWritable, ResourceWritable, Fact> mDriver = new MapDriver<>();
mDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
mDriver.setMapper(new ForwardChain.FileMapper(schema));
for (int j : connections.keySet()) {
for (int k : connections.get(j).keySet()) {
mDriver.addInput(connections.get(j).get(k));
}
}
List<Pair<ResourceWritable, Fact>> mapped = mDriver.run();
// Convert data for reduce phase:
ReduceFeeder<ResourceWritable, Fact> feeder = new ReduceFeeder<>(mDriver.getConfiguration());
List<KeyValueReuseList<ResourceWritable, Fact>> intermediate = feeder.sortAndGroup(mapped, new ResourceWritable.SecondaryComparator(), new ResourceWritable.PrimaryComparator());
// Reduce, and compare to expected output:
ReduceDriver<ResourceWritable, Fact, Fact, NullWritable> rDriver = new ReduceDriver<>();
rDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
rDriver.setReducer(new ForwardChain.ReasoningReducer(schema));
rDriver.addAllElements(intermediate);
int maxSpan = (int) Math.pow(2, i);
int minSpan = (maxSpan / 2) + 1;
// For each j, build all paths starting with j:
for (int j = 0; j < max; j++) {
// This includes any path of length k for appropriate k:
for (int k = minSpan; k <= maxSpan && j + k <= max; k++) {
int middle = j + minSpan - 1;
URI left = TestUtils.uri("org" + j);
URI right = TestUtils.uri("org" + (j + k));
Fact triple = new Fact(left, prop, right, i, OwlRule.PRP_TRP, TestUtils.uri("org" + middle));
triple.addSource(connections.get(j).get(middle).getFirst());
triple.addSource(connections.get(middle).get(j + k).getFirst());
Pair<Fact, NullWritable> expected = new Pair<>(triple, NullWritable.get());
connections.get(j).put(j + k, expected);
rDriver.addMultiOutput("intermediate", expected);
}
}
rDriver.runTest();
}
}
use of org.apache.hadoop.mrunit.types.Pair in project nutch by apache.
the class TestIndexerMapReduce method runIndexer.
/**
* Run {@link IndexerMapReduce.reduce(...)} to get a "indexed"
* {@link NutchDocument} by passing objects from segment and CrawlDb to the
* indexer.
*
* @param dbDatum
* crawl datum from CrawlDb
* @param fetchDatum
* crawl datum (fetch status) from segment
* @param parseText
* plain text from parsed document
* @param parseData
* parse data
* @param content
* (optional, if index binary content) protocol content
* @return "indexed" document
*/
public NutchDocument runIndexer(CrawlDatum dbDatum, CrawlDatum fetchDatum, ParseText parseText, ParseData parseData, Content content) {
List<NutchWritable> values = new ArrayList<NutchWritable>();
values.add(new NutchWritable(dbDatum));
values.add(new NutchWritable(fetchDatum));
values.add(new NutchWritable(parseText));
values.add(new NutchWritable(parseData));
values.add(new NutchWritable(content));
reduceDriver = ReduceDriver.newReduceDriver(reducer);
reduceDriver.getConfiguration().addResource(configuration);
reduceDriver.withInput(testUrlText, values);
List<Pair<Text, NutchIndexAction>> reduceResult;
NutchDocument doc = null;
try {
reduceResult = reduceDriver.run();
for (Pair<Text, NutchIndexAction> p : reduceResult) {
if (p.getSecond().action != NutchIndexAction.DELETE) {
doc = p.getSecond().doc;
}
}
} catch (IOException e) {
LOG.error(StringUtils.stringifyException(e));
}
return doc;
}
Aggregations