Search in sources :

Example 1 with Pair

use of org.apache.hadoop.mrunit.types.Pair in project jena by apache.

the class TriplesToQuadsBySubjectMapperTest method triples_to_quads_mapper_01.

/**
     * Tests quads to triples conversion
     * 
     * @throws IOException
     */
@Test
public void triples_to_quads_mapper_01() throws IOException {
    MapDriver<LongWritable, TripleWritable, LongWritable, QuadWritable> driver = this.getMapDriver();
    Triple t = new Triple(NodeFactory.createURI("http://s"), NodeFactory.createURI("http://p"), NodeFactory.createLiteral("test"));
    Quad q = new Quad(t.getSubject(), t);
    driver.withInput(new Pair<LongWritable, TripleWritable>(new LongWritable(1), new TripleWritable(t))).withOutput(new Pair<LongWritable, QuadWritable>(new LongWritable(1), new QuadWritable(q)));
    driver.runTest();
}
Also used : Triple(org.apache.jena.graph.Triple) TripleWritable(org.apache.jena.hadoop.rdf.types.TripleWritable) Quad(org.apache.jena.sparql.core.Quad) QuadWritable(org.apache.jena.hadoop.rdf.types.QuadWritable) LongWritable(org.apache.hadoop.io.LongWritable) Pair(org.apache.hadoop.mrunit.types.Pair) Test(org.junit.Test)

Example 2 with Pair

use of org.apache.hadoop.mrunit.types.Pair in project jena by apache.

the class CharacteristicSetReducerTest method characteristic_set_reducer_04.

/**
     * Test characteristic set reduction
     * 
     * @throws IOException
     */
@Test
public void characteristic_set_reducer_04() throws IOException {
    MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this.getMapReduceDriver();
    this.createSet(driver, 2, 1, "http://predicate");
    this.createSet(driver, 1, 1, "http://other");
    driver.runTest(false);
    driver = getMapReduceDriver();
    createSet(driver, 2, 1, "http://predicate");
    createSet(driver, 1, 1, "http://other");
    List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
    for (Pair<CharacteristicSetWritable, NullWritable> pair : results) {
        CharacteristicSetWritable cw = pair.getFirst();
        boolean expectTwo = cw.getCharacteristics().next().getNode().get().hasURI("http://predicate");
        Assert.assertEquals(expectTwo ? 2 : 1, cw.getCount().get());
    }
}
Also used : CharacteristicSetWritable(org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable) NullWritable(org.apache.hadoop.io.NullWritable) Pair(org.apache.hadoop.mrunit.types.Pair) Test(org.junit.Test)

Example 3 with Pair

use of org.apache.hadoop.mrunit.types.Pair in project jena by apache.

the class CharacteristicSetReducerTest method characteristic_set_reducer_02.

/**
     * Test characteristic set reduction
     * 
     * @throws IOException
     */
@Test
public void characteristic_set_reducer_02() throws IOException {
    MapReduceDriver<CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, CharacteristicSetWritable, NullWritable> driver = this.getMapReduceDriver();
    this.createSet(driver, 2, 1, "http://predicate");
    driver.runTest(false);
    driver = getMapReduceDriver();
    createSet(driver, 2, 1, "http://predicate");
    List<Pair<CharacteristicSetWritable, NullWritable>> results = driver.run();
    CharacteristicSetWritable cw = results.get(0).getFirst();
    Assert.assertEquals(2, cw.getCount().get());
}
Also used : CharacteristicSetWritable(org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable) NullWritable(org.apache.hadoop.io.NullWritable) Pair(org.apache.hadoop.mrunit.types.Pair) Test(org.junit.Test)

Example 4 with Pair

use of org.apache.hadoop.mrunit.types.Pair in project incubator-rya by apache.

the class ForwardChainTest method testTransitiveChain.

/**
 * MultipleOutputs support is minimal, so we have to check each map/reduce
 * step explicitly
 */
@Test
public void testTransitiveChain() throws Exception {
    int max = 8;
    int n = 4;
    URI prop = TestUtils.uri("subOrganizationOf");
    Map<Integer, Map<Integer, Pair<Fact, NullWritable>>> connections = new HashMap<>();
    for (int i = 0; i <= max; i++) {
        connections.put(i, new HashMap<Integer, Pair<Fact, NullWritable>>());
    }
    // Initial input: make a chain from org0 to org8
    for (int i = 0; i < max; i++) {
        URI orgI = TestUtils.uri("org" + i);
        URI orgJ = TestUtils.uri("org" + (i + 1));
        Fact triple = new Fact(orgI, prop, orgJ);
        connections.get(i).put(i + 1, new Pair<>(triple, NullWritable.get()));
    }
    for (int i = 1; i <= n; i++) {
        // Map:
        MapDriver<Fact, NullWritable, ResourceWritable, Fact> mDriver = new MapDriver<>();
        mDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
        mDriver.setMapper(new ForwardChain.FileMapper(schema));
        for (int j : connections.keySet()) {
            for (int k : connections.get(j).keySet()) {
                mDriver.addInput(connections.get(j).get(k));
            }
        }
        List<Pair<ResourceWritable, Fact>> mapped = mDriver.run();
        // Convert data for reduce phase:
        ReduceFeeder<ResourceWritable, Fact> feeder = new ReduceFeeder<>(mDriver.getConfiguration());
        List<KeyValueReuseList<ResourceWritable, Fact>> intermediate = feeder.sortAndGroup(mapped, new ResourceWritable.SecondaryComparator(), new ResourceWritable.PrimaryComparator());
        // Reduce, and compare to expected output:
        ReduceDriver<ResourceWritable, Fact, Fact, NullWritable> rDriver = new ReduceDriver<>();
        rDriver.getConfiguration().setInt(MRReasoningUtils.STEP_PROP, i);
        rDriver.setReducer(new ForwardChain.ReasoningReducer(schema));
        rDriver.addAllElements(intermediate);
        int maxSpan = (int) Math.pow(2, i);
        int minSpan = (maxSpan / 2) + 1;
        // For each j, build all paths starting with j:
        for (int j = 0; j < max; j++) {
            // This includes any path of length k for appropriate k:
            for (int k = minSpan; k <= maxSpan && j + k <= max; k++) {
                int middle = j + minSpan - 1;
                URI left = TestUtils.uri("org" + j);
                URI right = TestUtils.uri("org" + (j + k));
                Fact triple = new Fact(left, prop, right, i, OwlRule.PRP_TRP, TestUtils.uri("org" + middle));
                triple.addSource(connections.get(j).get(middle).getFirst());
                triple.addSource(connections.get(middle).get(j + k).getFirst());
                Pair<Fact, NullWritable> expected = new Pair<>(triple, NullWritable.get());
                connections.get(j).put(j + k, expected);
                rDriver.addMultiOutput("intermediate", expected);
            }
        }
        rDriver.runTest();
    }
}
Also used : HashMap(java.util.HashMap) ReduceFeeder(org.apache.hadoop.mrunit.mapreduce.ReduceFeeder) URI(org.openrdf.model.URI) ReduceDriver(org.apache.hadoop.mrunit.mapreduce.ReduceDriver) Pair(org.apache.hadoop.mrunit.types.Pair) MapDriver(org.apache.hadoop.mrunit.mapreduce.MapDriver) KeyValueReuseList(org.apache.hadoop.mrunit.types.KeyValueReuseList) Fact(org.apache.rya.reasoning.Fact) NullWritable(org.apache.hadoop.io.NullWritable) HashMap(java.util.HashMap) Map(java.util.Map) PrepareForTest(org.powermock.core.classloader.annotations.PrepareForTest) Test(org.junit.Test)

Example 5 with Pair

use of org.apache.hadoop.mrunit.types.Pair in project nutch by apache.

the class TestIndexerMapReduce method runIndexer.

/**
 * Run {@link IndexerMapReduce.reduce(...)} to get a &quot;indexed&quot;
 * {@link NutchDocument} by passing objects from segment and CrawlDb to the
 * indexer.
 *
 * @param dbDatum
 *          crawl datum from CrawlDb
 * @param fetchDatum
 *          crawl datum (fetch status) from segment
 * @param parseText
 *          plain text from parsed document
 * @param parseData
 *          parse data
 * @param content
 *          (optional, if index binary content) protocol content
 * @return &quot;indexed&quot; document
 */
public NutchDocument runIndexer(CrawlDatum dbDatum, CrawlDatum fetchDatum, ParseText parseText, ParseData parseData, Content content) {
    List<NutchWritable> values = new ArrayList<NutchWritable>();
    values.add(new NutchWritable(dbDatum));
    values.add(new NutchWritable(fetchDatum));
    values.add(new NutchWritable(parseText));
    values.add(new NutchWritable(parseData));
    values.add(new NutchWritable(content));
    reduceDriver = ReduceDriver.newReduceDriver(reducer);
    reduceDriver.getConfiguration().addResource(configuration);
    reduceDriver.withInput(testUrlText, values);
    List<Pair<Text, NutchIndexAction>> reduceResult;
    NutchDocument doc = null;
    try {
        reduceResult = reduceDriver.run();
        for (Pair<Text, NutchIndexAction> p : reduceResult) {
            if (p.getSecond().action != NutchIndexAction.DELETE) {
                doc = p.getSecond().doc;
            }
        }
    } catch (IOException e) {
        LOG.error(StringUtils.stringifyException(e));
    }
    return doc;
}
Also used : ArrayList(java.util.ArrayList) NutchWritable(org.apache.nutch.crawl.NutchWritable) Text(org.apache.hadoop.io.Text) ParseText(org.apache.nutch.parse.ParseText) IOException(java.io.IOException) Pair(org.apache.hadoop.mrunit.types.Pair)

Aggregations

Pair (org.apache.hadoop.mrunit.types.Pair)12 Test (org.junit.Test)11 NullWritable (org.apache.hadoop.io.NullWritable)8 GenericRecord (org.apache.avro.generic.GenericRecord)4 AvroKey (org.apache.avro.mapred.AvroKey)4 LongWritable (org.apache.hadoop.io.LongWritable)3 Triple (org.apache.jena.graph.Triple)3 CharacteristicSetWritable (org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable)3 QuadWritable (org.apache.jena.hadoop.rdf.types.QuadWritable)3 TripleWritable (org.apache.jena.hadoop.rdf.types.TripleWritable)3 Quad (org.apache.jena.sparql.core.Quad)3 HashMap (java.util.HashMap)2 BytesWritable (org.apache.hadoop.io.BytesWritable)2 File (java.io.File)1 FileInputStream (java.io.FileInputStream)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 Map (java.util.Map)1 Set (java.util.Set)1 Text (org.apache.hadoop.io.Text)1