Search in sources :

Example 6 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class MergeToolMapper method compareKeys.

/**
 * Since both Scanners will return sorted data, if the two key-values are
 * equal, then both Scanners can advance to the next comparison. If the Key
 * from Scanner1 sorts before the Key from Scanner2, then that Key doesn't
 * exist in the table from Scanner2 which means Scanner1 should advance. If
 * the Key from Scanner2 sorts before the Key from Scanner1, then that Key
 * doesn't exist in the table from Scanner1 which means Scanner2 should
 * advance.
 * @param key1 the {@link RyaStatement} from the parent instance table.
 * @param key2 the {@link RyaStatement} from the child instance table.
 * @return the {@link CompareKeysResult}.
 * @throws MutationsRejectedException
 * @throws IOException
 * @throws InterruptedException
 * @throws TripleRowResolverException
 */
private CompareKeysResult compareKeys(final RyaStatement key1, final RyaStatement key2) throws MutationsRejectedException, IOException, InterruptedException, TripleRowResolverException {
    log.trace("key1 = " + key1);
    log.trace("key2 = " + key2);
    if (key1 == null && key2 == null) {
        // Reached the end of the parent and child table.
        return CompareKeysResult.FINISHED;
    } else if (key1 == null) {
        // Reached the end of the parent table so add the remaining child keys if they meet the time criteria.
        final Date t2 = normalizeDate(new Date(key2.getTimestamp()), false);
        // Move on to next comparison (do nothing) or add this child key to parent
        final boolean doNothing = usesStartTime && t2.before(startTime);
        return doNothing ? CompareKeysResult.ADVANCE_CHILD : CompareKeysResult.ADVANCE_CHILD_AND_ADD;
    } else if (key2 == null) {
        // Reached the end of the child table so delete the remaining parent keys if they meet the time criteria.
        final Date t1 = normalizeDate(new Date(key1.getTimestamp()), true);
        // Move on to next comparison (do nothing) or delete this key from parent
        final boolean doNothing = usesStartTime && (copyToolInputTime != null && (t1.before(copyToolInputTime) || (t1.after(copyToolInputTime) && t1.after(startTime))) || (copyToolInputTime == null && t1.after(startTime)));
        return doNothing ? CompareKeysResult.ADVANCE_PARENT : CompareKeysResult.ADVANCE_PARENT_AND_DELETE;
    } else {
        // There are 2 keys to compare
        final Map<TABLE_LAYOUT, TripleRow> map1 = parentRyaContext.serializeTriple(key1);
        final Text row1 = new Text(map1.get(TABLE_LAYOUT.SPO).getRow());
        final Map<TABLE_LAYOUT, TripleRow> map2 = childRyaContext.serializeTriple(key2);
        final Text row2 = new Text(map2.get(TABLE_LAYOUT.SPO).getRow());
        final Date t1 = normalizeDate(new Date(key1.getTimestamp()), true);
        final Date t2 = normalizeDate(new Date(key2.getTimestamp()), false);
        if (row1.compareTo(row2) < 0) {
            // Parent key sort order was before the child key sort order
            // so it doesn't exist in the child table.
            // What does this mean?  Was it added by the parent after the child was cloned? (Meaning we should leave it)
            // Or did the child delete it after it was cloned? (Meaning we should delete it)
            final boolean doNothing = usesStartTime && (copyToolInputTime != null && (t1.before(copyToolInputTime) || (t1.after(copyToolInputTime) && t1.after(startTime))) || (copyToolInputTime == null && t1.after(startTime)));
            return doNothing ? CompareKeysResult.ADVANCE_PARENT : CompareKeysResult.ADVANCE_PARENT_AND_DELETE;
        } else if (row1.compareTo(row2) > 0) {
            // Parent key sort order was after the child key sort order
            // so it doesn't exist in the parent table.
            // What does this mean?  Was it deleted by the parent after the child was cloned? (Meaning we should leave it)
            // Or did the child add it after it was cloned? (Meaning we should add it)
            final boolean doNothing = usesStartTime && t2.before(startTime);
            return doNothing ? CompareKeysResult.ADVANCE_CHILD : CompareKeysResult.ADVANCE_CHILD_AND_ADD;
        } else {
            // move on to the next parent and child keys.
            return CompareKeysResult.ADVANCE_BOTH;
        }
    }
}
Also used : TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) TripleRow(org.apache.rya.api.resolver.triple.TripleRow) Text(org.apache.hadoop.io.Text) Date(java.util.Date)

Example 7 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class JoinSelectMapperTest method testOutput.

@Test
public void testOutput() throws TripleRowResolverException, IOException {
    RyaStatement rya = new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1"));
    Text s = new Text(rya.getSubject().getData());
    Text p = new Text(rya.getPredicate().getData());
    Text o = new Text(rya.getObject().getData());
    Text sp = new Text(rya.getSubject().getData() + DELIM + rya.getPredicate().getData());
    Text so = new Text(rya.getSubject().getData() + DELIM + rya.getObject().getData());
    Text po = new Text(rya.getPredicate().getData() + DELIM + rya.getObject().getData());
    Text ps = new Text(rya.getPredicate().getData() + DELIM + rya.getSubject().getData());
    Text op = new Text(rya.getObject().getData() + DELIM + rya.getPredicate().getData());
    Text os = new Text(rya.getObject().getData() + DELIM + rya.getSubject().getData());
    TripleEntry t1 = new TripleEntry(s, p, new Text("subject"), new Text("predicate"), new Text("object"));
    TripleEntry t2 = new TripleEntry(p, o, new Text("predicate"), new Text("object"), new Text("subject"));
    TripleEntry t3 = new TripleEntry(o, s, new Text("object"), new Text("subject"), new Text("predicate"));
    TripleEntry t4 = new TripleEntry(o, new Text(""), new Text("object"), new Text(""), new Text("subjectpredicate"));
    TripleEntry t5 = new TripleEntry(p, new Text(""), new Text("predicate"), new Text(""), new Text("objectsubject"));
    TripleEntry t6 = new TripleEntry(s, new Text(""), new Text("subject"), new Text(""), new Text("predicateobject"));
    TripleEntry t7 = new TripleEntry(s, new Text(""), new Text("subject"), new Text(""), new Text("objectpredicate"));
    TripleEntry t8 = new TripleEntry(p, new Text(""), new Text("predicate"), new Text(""), new Text("subjectobject"));
    TripleEntry t9 = new TripleEntry(o, new Text(""), new Text("object"), new Text(""), new Text("predicatesubject"));
    TripleRowResolver trr = new WholeRowTripleResolver();
    Map<TABLE_LAYOUT, TripleRow> map = trr.serialize(rya);
    System.out.println(map);
    TripleRow tr = map.get(TABLE_LAYOUT.SPO);
    System.out.println("Triple row is" + tr);
    System.out.println("ColumnV is " + tr.getTimestamp());
    byte[] b = new byte[0];
    Key key = new Key(tr.getRow(), tr.getColumnFamily(), tr.getColumnQualifier(), b, 1);
    Value val = new Value(b);
    new MapDriver<Key, Value, CompositeType, TripleCard>().withMapper(new JoinSelectSpoTableOutput.JoinSelectMapper()).withInput(key, val).withOutput(new CompositeType(o, new IntWritable(2)), new TripleCard(t1)).withOutput(new CompositeType(s, new IntWritable(2)), new TripleCard(t2)).withOutput(new CompositeType(p, new IntWritable(2)), new TripleCard(t3)).withOutput(new CompositeType(po, new IntWritable(2)), new TripleCard(t6)).withOutput(new CompositeType(so, new IntWritable(2)), new TripleCard(t5)).withOutput(new CompositeType(sp, new IntWritable(2)), new TripleCard(t4)).withOutput(new CompositeType(op, new IntWritable(2)), new TripleCard(t7)).withOutput(new CompositeType(os, new IntWritable(2)), new TripleCard(t8)).withOutput(new CompositeType(ps, new IntWritable(2)), new TripleCard(t9)).runTest();
}
Also used : TripleRowResolver(org.apache.rya.api.resolver.triple.TripleRowResolver) MapDriver(org.apache.hadoop.mrunit.mapreduce.MapDriver) RyaStatement(org.apache.rya.api.domain.RyaStatement) Text(org.apache.hadoop.io.Text) RyaType(org.apache.rya.api.domain.RyaType) TripleCard(org.apache.rya.joinselect.mr.utils.TripleCard) RyaURI(org.apache.rya.api.domain.RyaURI) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) TripleRow(org.apache.rya.api.resolver.triple.TripleRow) WholeRowTripleResolver(org.apache.rya.api.resolver.triple.impl.WholeRowTripleResolver) Value(org.apache.accumulo.core.data.Value) TripleEntry(org.apache.rya.joinselect.mr.utils.TripleEntry) Key(org.apache.accumulo.core.data.Key) IntWritable(org.apache.hadoop.io.IntWritable) CompositeType(org.apache.rya.joinselect.mr.utils.CompositeType) Test(org.junit.Test)

Example 8 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class JoinSelectStatisticsTest method testMap3.

@Test
public void testMap3() throws Exception {
    init();
    System.out.println("*************************Test3**************************** ");
    BatchWriter bw_table1 = c.createBatchWriter("rya_spo", new BatchWriterConfig());
    for (int i = 1; i < 3; i++) {
        for (int j = 1; j < 3; j++) {
            for (int k = 1; k < 3; k++) {
                RyaStatement rs = new RyaStatement(new RyaURI(uri + i), new RyaURI(uri + (j)), new RyaType(uri + k));
                Map<TABLE_LAYOUT, TripleRow> tripleRowMap = ryaContext.serializeTriple(rs);
                TripleRow tripleRow = tripleRowMap.get(TABLE_LAYOUT.SPO);
                Mutation m = JoinSelectStatsUtil.createMutation(tripleRow);
                bw_table1.addMutation(m);
            }
        }
    }
    bw_table1.close();
    BatchWriter bw_table2 = c.createBatchWriter("rya_prospects", new BatchWriterConfig());
    for (int i = 1; i < 3; i++) {
        int k = 1;
        for (String s : cardList) {
            Mutation m = new Mutation(new Text(s + DELIM + uri + i + DELIM + i));
            m.put(new Text(), new Text(), new Value(new IntWritable(i + k).toString().getBytes()));
            bw_table2.addMutation(m);
            k++;
        }
        for (int j = 1; j < 3; j++) {
            k = 1;
            for (String s : aggCardList) {
                Mutation m = new Mutation(new Text(s + DELIM + uri + i + DELIM + uri + j + DELIM + i));
                m.put(new Text(), new Text(), new Value(new IntWritable(i + k + j).toString().getBytes()));
                bw_table2.addMutation(m);
                k++;
            }
        }
    }
    bw_table2.close();
    Assert.assertEquals(0, ToolRunner.run(new JoinSelectTestDriver(), new String[] { "" }));
    Scanner scan = c.createScanner("rya_selectivity", new Authorizations());
    scan.setRange(new Range());
    for (Map.Entry<Key, Value> entry : scan) {
        System.out.println("Key row string is " + entry.getKey().getRow().toString());
        System.out.println("Join type is " + entry.getKey().getColumnFamily().toString());
        System.out.println("Value is " + entry.getKey().getColumnQualifier().toString());
    }
    Scanner scan1 = c.createScanner("rya_selectivity", new Authorizations());
    scan1.setRange(Range.prefix("subject" + DELIM + uri + 1));
    int i = 0;
    for (Map.Entry<Key, Value> entry : scan1) {
        Key key = entry.getKey();
        String s = key.getColumnFamily().toString();
        int val = Integer.parseInt(key.getColumnQualifier().toString());
        if (s.equals("predicatepredicate")) {
            Assert.assertTrue(val == 14);
        }
        if (s.equals("objectobject")) {
            Assert.assertTrue(val == 18);
        }
        if (s.equals("predicateobjectpredicateobject")) {
            Assert.assertTrue(val == 28);
        }
        if (s.equals("predicateobjectsubjectpredicate")) {
            Assert.assertTrue(val == 20);
        }
        if (s.equals("predicateobjectobjectsubject")) {
            Assert.assertTrue(val == 16);
        }
        i++;
    }
    Assert.assertTrue(i == 12);
}
Also used : Scanner(org.apache.accumulo.core.client.Scanner) Authorizations(org.apache.accumulo.core.security.Authorizations) RyaStatement(org.apache.rya.api.domain.RyaStatement) Text(org.apache.hadoop.io.Text) RyaType(org.apache.rya.api.domain.RyaType) Range(org.apache.accumulo.core.data.Range) RyaURI(org.apache.rya.api.domain.RyaURI) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) TripleRow(org.apache.rya.api.resolver.triple.TripleRow) Value(org.apache.accumulo.core.data.Value) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) Map(java.util.Map) IntWritable(org.apache.hadoop.io.IntWritable) Key(org.apache.accumulo.core.data.Key) Test(org.junit.Test)

Example 9 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class RyaGiraphUtils method initializeAccumuloInputFormat.

public static void initializeAccumuloInputFormat(Configuration conf) {
    // get accumulo connect information
    boolean mock = MRUtils.getACMock(conf, false);
    String zk = MRUtils.getACZK(conf);
    String instance = MRUtils.getACInstance(conf);
    String userName = MRUtils.getACUserName(conf);
    String pwd = MRUtils.getACPwd(conf);
    String tablePrefix = MRUtils.getTablePrefix(conf);
    TABLE_LAYOUT rdfTableLayout = MRUtils.getTableLayout(conf, TABLE_LAYOUT.SPO);
    String authString = conf.get(MRUtils.AC_AUTH_PROP);
    Authorizations authorizations;
    if (authString != null && !authString.isEmpty()) {
        authorizations = new Authorizations(authString.split(","));
        // for consistency
        conf.set(ConfigUtils.CLOUDBASE_AUTHS, authString);
    } else {
        authorizations = AccumuloRdfConstants.ALL_AUTHORIZATIONS;
    }
    // set up the accumulo input format so that we know what table to use and everything
    try {
        Job job = new Job(conf);
        AccumuloInputFormat.setConnectorInfo(job, userName, new PasswordToken(pwd));
        String tableName = RdfCloudTripleStoreUtils.layoutPrefixToTable(rdfTableLayout, tablePrefix);
        AccumuloInputFormat.setInputTableName(job, tableName);
        AccumuloInputFormat.setScanAuthorizations(job, authorizations);
        if (mock) {
            AccumuloInputFormat.setMockInstance(job, instance);
        } else {
            ClientConfiguration clientConfig = ClientConfiguration.loadDefault().withInstance(instance).withZkHosts(zk);
            AccumuloInputFormat.setZooKeeperInstance(job, clientConfig);
        }
    } catch (IOException | AccumuloSecurityException e) {
        // TODO better exception handling here
        e.printStackTrace();
    }
}
Also used : TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) Authorizations(org.apache.accumulo.core.security.Authorizations) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) IOException(java.io.IOException) Job(org.apache.hadoop.mapreduce.Job) ClientConfiguration(org.apache.accumulo.core.client.ClientConfiguration)

Example 10 with TABLE_LAYOUT

use of org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT in project incubator-rya by apache.

the class StatementPatternStorage method createRange.

protected Map.Entry<TABLE_LAYOUT, Range> createRange(Value s_v, Value p_v, Value o_v) throws IOException {
    RyaURI subject_rya = RdfToRyaConversions.convertResource((Resource) s_v);
    RyaURI predicate_rya = RdfToRyaConversions.convertURI((URI) p_v);
    RyaType object_rya = RdfToRyaConversions.convertValue(o_v);
    TriplePatternStrategy strategy = ryaContext.retrieveStrategy(subject_rya, predicate_rya, object_rya, null);
    if (strategy == null) {
        return new RdfCloudTripleStoreUtils.CustomEntry<TABLE_LAYOUT, Range>(TABLE_LAYOUT.SPO, new Range());
    }
    Map.Entry<TABLE_LAYOUT, ByteRange> entry = strategy.defineRange(subject_rya, predicate_rya, object_rya, null, null);
    ByteRange byteRange = entry.getValue();
    return new RdfCloudTripleStoreUtils.CustomEntry<org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT, Range>(entry.getKey(), new Range(new Text(byteRange.getStart()), new Text(byteRange.getEnd())));
}
Also used : TriplePatternStrategy(org.apache.rya.api.query.strategy.TriplePatternStrategy) ByteRange(org.apache.rya.api.query.strategy.ByteRange) Text(org.apache.hadoop.io.Text) RyaType(org.apache.rya.api.domain.RyaType) Range(org.apache.accumulo.core.data.Range) ByteRange(org.apache.rya.api.query.strategy.ByteRange) RyaURI(org.apache.rya.api.domain.RyaURI) TABLE_LAYOUT(org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT) Map(java.util.Map)

Aggregations

TABLE_LAYOUT (org.apache.rya.api.RdfCloudTripleStoreConstants.TABLE_LAYOUT)32 TripleRow (org.apache.rya.api.resolver.triple.TripleRow)17 RyaStatement (org.apache.rya.api.domain.RyaStatement)14 RyaURI (org.apache.rya.api.domain.RyaURI)14 IOException (java.io.IOException)12 Map (java.util.Map)11 Range (org.apache.accumulo.core.data.Range)11 Text (org.apache.hadoop.io.Text)11 RyaType (org.apache.rya.api.domain.RyaType)11 ByteRange (org.apache.rya.api.query.strategy.ByteRange)11 HashMap (java.util.HashMap)10 Key (org.apache.accumulo.core.data.Key)9 Value (org.apache.accumulo.core.data.Value)9 Mutation (org.apache.accumulo.core.data.Mutation)8 Authorizations (org.apache.accumulo.core.security.Authorizations)8 Scanner (org.apache.accumulo.core.client.Scanner)7 Test (org.junit.Test)7 RyaRange (org.apache.rya.api.domain.RyaRange)6 IntWritable (org.apache.hadoop.io.IntWritable)5 RyaDAOException (org.apache.rya.api.persist.RyaDAOException)5