Search in sources :

Example 51 with LongWritable

use of org.apache.hadoop.io.LongWritable in project jena by apache.

the class TestDistinctTriples method distinct_triples_03.

@Test
public void distinct_triples_03() throws IOException {
    MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this.getMapReduceDriver();
    Triple t = new Triple(NodeFactory.createURI("urn:s"), NodeFactory.createURI("urn:p"), NodeFactory.createLiteral("1"));
    Triple t2 = new Triple(t.getSubject(), t.getPredicate(), NodeFactory.createLiteral("2"));
    Assert.assertNotEquals(t, t2);
    TripleWritable tw = new TripleWritable(t);
    TripleWritable tw2 = new TripleWritable(t2);
    Assert.assertNotEquals(tw, tw2);
    driver.addInput(new LongWritable(1), tw);
    driver.addInput(new LongWritable(2), tw2);
    driver.addOutput(NullWritable.get(), tw);
    driver.addOutput(NullWritable.get(), tw2);
    driver.runTest(false);
}
Also used : Triple(org.apache.jena.graph.Triple) TripleWritable(org.apache.jena.hadoop.rdf.types.TripleWritable) LongWritable(org.apache.hadoop.io.LongWritable) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 52 with LongWritable

use of org.apache.hadoop.io.LongWritable in project jena by apache.

the class AbstractCharacteristicSetGeneratingReducerTests method characteristic_set_generating_reducer_01.

/**
     * Test basic characteristic set computation
     * 
     * @throws IOException
     */
@Test
public void characteristic_set_generating_reducer_01() throws IOException {
    MapReduceDriver<LongWritable, T, NodeWritable, T, CharacteristicSetWritable, NullWritable> driver = this.getMapReduceDriver();
    T tuple = this.createTuple(1, "http://predicate");
    driver.addInput(new LongWritable(1), tuple);
    this.createSet(driver, 1, "http://predicate");
    driver.runTest(false);
}
Also used : NodeWritable(org.apache.jena.hadoop.rdf.types.NodeWritable) CharacteristicSetWritable(org.apache.jena.hadoop.rdf.types.CharacteristicSetWritable) LongWritable(org.apache.hadoop.io.LongWritable) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 53 with LongWritable

use of org.apache.hadoop.io.LongWritable in project jena by apache.

the class TestDistinctTriples method distinct_triples_04.

@Test
public void distinct_triples_04() throws IOException {
    MapReduceDriver<LongWritable, TripleWritable, TripleWritable, NullWritable, NullWritable, TripleWritable> driver = this.getMapReduceDriver();
    Node s1 = NodeFactory.createURI("urn:nf#cbf2b2c7-109e-4097-bbea-f67f272c7fcc");
    Node s2 = NodeFactory.createURI("urn:nf#bb08b75c-1ad2-47ef-acd2-eb2d92b94b89");
    Node p = NodeFactory.createURI("urn:p");
    Node o = NodeFactory.createURI("urn:66.230.159.118");
    Assert.assertNotEquals(s1, s2);
    Triple t1 = new Triple(s1, p, o);
    Triple t2 = new Triple(s2, p, o);
    Assert.assertNotEquals(t1, t2);
    TripleWritable tw1 = new TripleWritable(t1);
    TripleWritable tw2 = new TripleWritable(t2);
    Assert.assertNotEquals(tw1, tw2);
    Assert.assertNotEquals(0, tw1.compareTo(tw2));
    driver.addInput(new LongWritable(1), tw1);
    driver.addInput(new LongWritable(2), tw2);
    driver.addOutput(NullWritable.get(), tw1);
    driver.addOutput(NullWritable.get(), tw2);
    driver.runTest(false);
}
Also used : Triple(org.apache.jena.graph.Triple) TripleWritable(org.apache.jena.hadoop.rdf.types.TripleWritable) Node(org.apache.jena.graph.Node) LongWritable(org.apache.hadoop.io.LongWritable) NullWritable(org.apache.hadoop.io.NullWritable) Test(org.junit.Test)

Example 54 with LongWritable

use of org.apache.hadoop.io.LongWritable in project jena by apache.

the class AbstractBlockBasedNodeTupleReader method nextKeyValue.

@Override
public boolean nextKeyValue() throws IOException {
    // Reuse key for efficiency
    if (key == null) {
        key = new LongWritable();
    }
    if (this.finished)
        return false;
    try {
        if (this.iter.hasNext()) {
            // Position will be relative to the start for the split we're
            // processing
            Long l = this.start + this.stream.getPosition();
            if (l != null) {
                this.key.set(l);
                // finished
                if (this.compressionCodecs != null && l > this.length)
                    this.length = l + 1;
            }
            this.tuple = this.createInstance(this.iter.next());
            return true;
        } else {
            // Need to ensure that the parser thread has finished in order
            // to determine whether we finished without error
            this.waitForParserFinished();
            if (this.parserError != null) {
                LOG.error("Error parsing block, aborting further parsing", this.parserError);
                if (!this.ignoreBadTuples)
                    throw new IOException("Error parsing block at position " + (this.start + this.input.getBytesRead()) + ", aborting further parsing", this.parserError);
            }
            this.key = null;
            this.tuple = null;
            this.finished = true;
            // the stream
            if (this.compressionCodecs != null)
                this.length--;
            return false;
        }
    } catch (IOException e) {
        throw e;
    } catch (Throwable e) {
        // Failed to read the tuple on this line
        LOG.error("Error parsing block, aborting further parsing", e);
        if (!this.ignoreBadTuples) {
            this.iter.close();
            throw new IOException("Error parsing block at position " + (this.start + this.input.getBytesRead()) + ", aborting further parsing", e);
        }
        this.key = null;
        this.tuple = null;
        this.finished = true;
        return false;
    }
}
Also used : LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException)

Example 55 with LongWritable

use of org.apache.hadoop.io.LongWritable in project jena by apache.

the class AbstractLineBasedNodeTupleReader method nextKeyValue.

@Override
public final boolean nextKeyValue() throws IOException {
    // Reuse key for efficiency
    if (key == null) {
        key = new LongWritable();
    }
    // Reset value which we use for reading lines
    if (value == null) {
        value = new Text();
    }
    tuple = null;
    // Try to read the next valid line
    int newSize = 0;
    while (pos < end) {
        // Read next line
        newSize = in.readLine(value, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
        // Once we get an empty line we've reached the end of our input
        if (newSize == 0) {
            break;
        }
        // Update position, remember that where inputs are compressed we may
        // be at a larger position then we expected because the length of
        // the split is likely less than the length of the data once
        // decompressed
        key.set(pos);
        pos += newSize;
        if (pos > estLength)
            estLength = pos + 1;
        // Skip lines that exceed the line length limit that has been set
        if (newSize >= maxLineLength) {
            LOG.warn("Skipped oversized line of size {} at position {}", newSize, (pos - newSize));
            continue;
        }
        // Attempt to read the tuple from current line
        try {
            Iterator<TValue> iter = this.getIterator(value.toString(), maker);
            if (iter.hasNext()) {
                tuple = this.createInstance(iter.next());
                // break out of the loop
                break;
            } else {
                // Empty line/Comment line
                LOG.debug("Valid line with no triple at position {}", (pos - newSize));
                continue;
            }
        } catch (Throwable e) {
            // Failed to read the tuple on this line
            LOG.error("Bad tuple at position " + (pos - newSize), e);
            if (this.ignoreBadTuples)
                continue;
            throw new IOException(String.format("Bad tuple at position %d", (pos - newSize)), e);
        }
    }
    boolean result = this.tuple != null;
    // End of input
    if (newSize == 0) {
        key = null;
        value = null;
        tuple = null;
        result = false;
        estLength = pos;
    }
    LOG.debug("nextKeyValue() --> {}", result);
    return result;
}
Also used : Text(org.apache.hadoop.io.Text) LongWritable(org.apache.hadoop.io.LongWritable) IOException(java.io.IOException)

Aggregations

LongWritable (org.apache.hadoop.io.LongWritable)445 Text (org.apache.hadoop.io.Text)220 Test (org.junit.Test)171 IntWritable (org.apache.hadoop.io.IntWritable)102 Path (org.apache.hadoop.fs.Path)99 BytesWritable (org.apache.hadoop.io.BytesWritable)70 FloatWritable (org.apache.hadoop.io.FloatWritable)68 Configuration (org.apache.hadoop.conf.Configuration)62 DoubleWritable (org.apache.hadoop.hive.serde2.io.DoubleWritable)62 BooleanWritable (org.apache.hadoop.io.BooleanWritable)60 ArrayList (java.util.ArrayList)59 ObjectInspector (org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector)57 ShortWritable (org.apache.hadoop.hive.serde2.io.ShortWritable)53 IOException (java.io.IOException)49 ByteWritable (org.apache.hadoop.hive.serde2.io.ByteWritable)48 SequenceFile (org.apache.hadoop.io.SequenceFile)42 HiveDecimalWritable (org.apache.hadoop.hive.serde2.io.HiveDecimalWritable)40 FileSystem (org.apache.hadoop.fs.FileSystem)37 JobConf (org.apache.hadoop.mapred.JobConf)37 DeferredObject (org.apache.hadoop.hive.ql.udf.generic.GenericUDF.DeferredObject)35