Search in sources :

Example 11 with ProspectorServiceEvalStatsDAO

use of org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO in project incubator-rya by apache.

the class EntityTupleSet method init.

private void init() {
    try {
        accCon = ConfigUtils.getConnector(conf);
    } catch (AccumuloException e) {
        e.printStackTrace();
    } catch (AccumuloSecurityException e) {
        e.printStackTrace();
    }
    try {
        if (conf.isUseStats() && conf.isUseSelectivity()) {
            ProspectorServiceEvalStatsDAO evalDao = new ProspectorServiceEvalStatsDAO(accCon, conf);
            evalDao.init();
            AccumuloSelectivityEvalDAO ase = new AccumuloSelectivityEvalDAO(conf, accCon);
            ase.setRdfEvalDAO(evalDao);
            ase.init();
            cardinality = starQuery.getCardinality(ase);
            CardinalityStatementPattern csp = starQuery.getMinCardSp(ase);
            minCard = csp.getCardinality();
            minSp = csp.getSp();
        } else {
            // TODO come up with a better default if cardinality is not
            // initialized
            cardinality = minCard = 1;
            minSp = starQuery.getNodes().get(0);
        }
    } catch (final Exception e) {
        LOG.warn("A problem was encountered while initializing the EntityTupleSet.", e);
    }
}
Also used : AccumuloException(org.apache.accumulo.core.client.AccumuloException) CardinalityStatementPattern(org.apache.rya.indexing.accumulo.entity.StarQuery.CardinalityStatementPattern) ProspectorServiceEvalStatsDAO(org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO) AccumuloSelectivityEvalDAO(org.apache.rya.joinselect.AccumuloSelectivityEvalDAO) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) SailException(org.openrdf.sail.SailException) QueryEvaluationException(org.openrdf.query.QueryEvaluationException) AccumuloSecurityException(org.apache.accumulo.core.client.AccumuloSecurityException) AccumuloException(org.apache.accumulo.core.client.AccumuloException)

Example 12 with ProspectorServiceEvalStatsDAO

use of org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO in project incubator-rya by apache.

the class EntityOptimizerTest method init.

@Before
public void init() throws RepositoryException, TupleQueryResultHandlerException, QueryEvaluationException, MalformedQueryException, AccumuloException, AccumuloSecurityException, TableExistsException {
    accCon = new MockInstance("instance").getConnector("root", "".getBytes());
    config = new BatchWriterConfig();
    config.setMaxMemory(1000);
    config.setMaxLatency(1000, TimeUnit.SECONDS);
    config.setMaxWriteThreads(10);
    if (accCon.tableOperations().exists("rya_prospects")) {
        try {
            accCon.tableOperations().delete("rya_prospects");
        } catch (TableNotFoundException e) {
            e.printStackTrace();
        }
    }
    if (accCon.tableOperations().exists("rya_selectivity")) {
        try {
            accCon.tableOperations().delete("rya_selectivity");
        } catch (TableNotFoundException e) {
            e.printStackTrace();
        }
    }
    accCon.tableOperations().create("rya_prospects");
    accCon.tableOperations().create("rya_selectivity");
    Configuration con = new Configuration();
    con.set(ConfigUtils.CLOUDBASE_AUTHS, "U");
    con.set(ConfigUtils.CLOUDBASE_INSTANCE, "instance");
    con.set(ConfigUtils.CLOUDBASE_USER, "root");
    con.set(ConfigUtils.CLOUDBASE_PASSWORD, "");
    conf = new AccumuloRdfConfiguration(con);
    TablePrefixLayoutStrategy tps = new TablePrefixLayoutStrategy("rya_");
    conf.setTableLayoutStrategy(tps);
    conf.set(ConfigUtils.USE_MOCK_INSTANCE, "true");
    res = new ProspectorServiceEvalStatsDAO(accCon, conf);
}
Also used : TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) Configuration(org.apache.hadoop.conf.Configuration) RdfCloudTripleStoreConfiguration(org.apache.rya.api.RdfCloudTripleStoreConfiguration) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) TablePrefixLayoutStrategy(org.apache.rya.api.layout.TablePrefixLayoutStrategy) ProspectorServiceEvalStatsDAO(org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO) BatchWriterConfig(org.apache.accumulo.core.client.BatchWriterConfig) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) Before(org.junit.Before)

Example 13 with ProspectorServiceEvalStatsDAO

use of org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO in project incubator-rya by apache.

the class RdfCloudTripleStoreSelectivityEvaluationStatisticsTest method testOptimizeQ1.

@Test
public void testOptimizeQ1() throws Exception {
    RdfEvalStatsDAO<RdfCloudTripleStoreConfiguration> res = new ProspectorServiceEvalStatsDAO(conn, arc);
    AccumuloSelectivityEvalDAO accc = new AccumuloSelectivityEvalDAO();
    accc.setConf(arc);
    accc.setRdfEvalDAO(res);
    accc.setConnector(conn);
    accc.init();
    BatchWriter bw1 = conn.createBatchWriter("rya_prospects", config);
    BatchWriter bw2 = conn.createBatchWriter("rya_selectivity", config);
    String s1 = "predicateobject" + DELIM + "http://www.w3.org/2000/01/rdf-schema#label" + DELIM + "uri:dog";
    String s2 = "predicateobject" + DELIM + "uri:barksAt" + DELIM + "uri:cat";
    String s3 = "predicateobject" + DELIM + "uri:peesOn" + DELIM + "uri:hydrant";
    List<Mutation> mList = new ArrayList<Mutation>();
    List<Mutation> mList2 = new ArrayList<Mutation>();
    List<String> sList = Arrays.asList("subjectobject", "subjectpredicate", "subjectsubject", "predicateobject", "predicatepredicate", "predicatesubject");
    Mutation m1, m2, m3, m4;
    m1 = new Mutation(s1 + DELIM + "1");
    m1.put(new Text("count"), new Text(""), new Value("1".getBytes()));
    m2 = new Mutation(s2 + DELIM + "2");
    m2.put(new Text("count"), new Text(""), new Value("2".getBytes()));
    m3 = new Mutation(s3 + DELIM + "3");
    m3.put(new Text("count"), new Text(""), new Value("3".getBytes()));
    mList.add(m1);
    mList.add(m2);
    mList.add(m3);
    bw1.addMutations(mList);
    bw1.close();
    // Scanner scan = conn.createScanner("rya_prospects", new Authorizations());
    // scan.setRange(new Range());
    // for (Map.Entry<Key, Value> entry : scan) {
    // System.out.println("Key row string is " + entry.getKey().getRow().toString());
    // System.out.println("Key is " + entry.getKey());
    // System.out.println("Value is " + (new String(entry.getValue().get())));
    // }
    m1 = new Mutation(s1);
    m2 = new Mutation(s2);
    m3 = new Mutation(s3);
    m4 = new Mutation(new Text("subjectpredicateobject" + DELIM + "FullTableCardinality"));
    m4.put(new Text("FullTableCardinality"), new Text("100"), EMPTY_VAL);
    int i = 2;
    int j = 3;
    int k = 4;
    Long count1;
    Long count2;
    Long count3;
    for (String s : sList) {
        count1 = (long) i;
        count2 = (long) j;
        count3 = (long) k;
        m1.put(new Text(s), new Text(count1.toString()), EMPTY_VAL);
        m2.put(new Text(s), new Text(count2.toString()), EMPTY_VAL);
        m3.put(new Text(s), new Text(count3.toString()), EMPTY_VAL);
        i = 2 * i;
        j = 2 * j;
        k = 2 * k;
    }
    mList2.add(m1);
    mList2.add(m2);
    mList2.add(m3);
    mList2.add(m4);
    bw2.addMutations(mList2);
    bw2.close();
    // scan = conn.createScanner("rya_selectivity", new Authorizations());
    // scan.setRange(new Range());
    // for (Map.Entry<Key, Value> entry : scan) {
    // System.out.println("Key row string is " + entry.getKey().getRow().toString());
    // System.out.println("Key is " + entry.getKey());
    // System.out.println("Value is " + (new String(entry.getKey().getColumnQualifier().toString())));
    // 
    // }
    TupleExpr te = getTupleExpr(q1);
    System.out.println(te);
    RdfCloudTripleStoreSelectivityEvaluationStatistics ars = new RdfCloudTripleStoreSelectivityEvaluationStatistics(arc, res, accc);
    double card = ars.getCardinality(te);
    Assert.assertEquals(6.3136, card, .0001);
}
Also used : ProspectorServiceEvalStatsDAO(org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO) ArrayList(java.util.ArrayList) AccumuloSelectivityEvalDAO(org.apache.rya.joinselect.AccumuloSelectivityEvalDAO) Text(org.apache.hadoop.io.Text) TupleExpr(org.openrdf.query.algebra.TupleExpr) Value(org.apache.accumulo.core.data.Value) BatchWriter(org.apache.accumulo.core.client.BatchWriter) Mutation(org.apache.accumulo.core.data.Mutation) RdfCloudTripleStoreConfiguration(org.apache.rya.api.RdfCloudTripleStoreConfiguration) Test(org.junit.Test)

Example 14 with ProspectorServiceEvalStatsDAO

use of org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO in project incubator-rya by apache.

the class ProspectorExample method main.

public static void main(final String[] args) throws Exception {
    setupLogging();
    // Configure Rya to use a mock instance.
    final AccumuloRdfConfiguration config = new AccumuloRdfConfiguration();
    config.useMockInstance(true);
    config.setTablePrefix("rya_");
    config.setUsername("user");
    config.setPassword("pass");
    config.setInstanceName("accumulo");
    // Load some data into Rya.
    final List<Statement> statements = Lists.newArrayList(VALUE_FACTORY.createStatement(ALICE, WORKS_AT, BURGER_JOINT), VALUE_FACTORY.createStatement(ALICE, ADMIRES, BOB), VALUE_FACTORY.createStatement(BOB, WORKS_AT, DONUT_SHOP), VALUE_FACTORY.createStatement(CHARLIE, WORKS_AT, DONUT_SHOP), VALUE_FACTORY.createStatement(CHARLIE, LIVES_WITH, BOB), VALUE_FACTORY.createStatement(BOB, LIVES_WITH, CHARLIE), VALUE_FACTORY.createStatement(BOB, LIVES_WITH, ALICE));
    final Sail sail = RyaSailFactory.getInstance(config);
    final SailConnection conn = sail.getConnection();
    log.info("Loading the following statements into a Mock instance of Accumulo Rya:");
    conn.begin();
    for (final Statement statement : statements) {
        log.info("    " + statement.toString());
        conn.addStatement(statement.getSubject(), statement.getPredicate(), statement.getObject());
    }
    conn.commit();
    conn.close();
    // Create the table that the Prospector's results will be written to.
    ConnectorFactory.connect(config).tableOperations().create("rya_prospects");
    // Run the Prospector using the configuration file that is in the resources directory.
    log.info("");
    log.info("Running the Map Reduce job that computes the Prospector results.");
    ToolRunner.run(new Prospector(), new String[] { "src/main/resources/stats_cluster_config.xml" });
    // Print the table that was created by the Prospector.
    log.info("");
    log.info("The following cardinalities were written to the Prospector table:");
    final ProspectorServiceEvalStatsDAO dao = ProspectorServiceEvalStatsDAO.make(config);
    // Do each of the Subjects.
    double cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(ALICE));
    log.info("    subject: " + ALICE + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(BOB));
    log.info("    subject: " + BOB + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECT, Lists.newArrayList(CHARLIE));
    log.info("    subject: " + CHARLIE + ", cardinality: " + cardinality);
    // Do each of the Predicates.
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(WORKS_AT));
    log.info("    predicate: " + WORKS_AT + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(ADMIRES));
    log.info("    predicate: " + ADMIRES + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATE, Lists.newArrayList(LIVES_WITH));
    log.info("    predicate: " + LIVES_WITH + ", cardinality: " + cardinality);
    // Do each of the Objects.
    cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BURGER_JOINT));
    log.info("    object: " + BURGER_JOINT + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(DONUT_SHOP));
    log.info("    object: " + DONUT_SHOP + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(ALICE));
    log.info("    object: " + ALICE + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(BOB));
    log.info("    object: " + BOB + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.OBJECT, Lists.newArrayList(CHARLIE));
    log.info("    object: " + CHARLIE + ", cardinality: " + cardinality);
    // Do each of the Subject/Predicate pairs.
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, WORKS_AT));
    log.info("    subject/predicate: " + ALICE + "/" + WORKS_AT + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(ALICE, ADMIRES));
    log.info("    subject/predicate: " + ALICE + "/" + ADMIRES + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, WORKS_AT));
    log.info("    subject/predicate: " + BOB + "/" + WORKS_AT + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, WORKS_AT));
    log.info("    subject/predicate: " + CHARLIE + "/" + WORKS_AT + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(CHARLIE, LIVES_WITH));
    log.info("    subject/predicate: " + CHARLIE + "/" + LIVES_WITH + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTPREDICATE, Lists.newArrayList(BOB, LIVES_WITH));
    log.info("    subject/predicate: " + BOB + "/" + LIVES_WITH + ", cardinality: " + cardinality);
    // Do each of the Subject/Object pairs.
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BURGER_JOINT));
    log.info("    subject/object: " + ALICE + "/" + BURGER_JOINT + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(ALICE, BOB));
    log.info("    subject/object: " + ALICE + "/" + BOB + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, DONUT_SHOP));
    log.info("    subject/object: " + ALICE + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, DONUT_SHOP));
    log.info("    subject/object: " + CHARLIE + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(CHARLIE, BOB));
    log.info("    subject/object: " + CHARLIE + "/" + BOB + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, CHARLIE));
    log.info("    subject/object: " + BOB + "/" + CHARLIE + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.SUBJECTOBJECT, Lists.newArrayList(BOB, ALICE));
    log.info("    subject/object: " + BOB + "/" + ALICE + ", cardinality: " + cardinality);
    // Do each of the Predicate/Object pairs.
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, BURGER_JOINT));
    log.info("    predicate/object: " + WORKS_AT + "/" + BURGER_JOINT + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(ADMIRES, BOB));
    log.info("    predicate/object: " + ADMIRES + "/" + BOB + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(WORKS_AT, DONUT_SHOP));
    log.info("    predicate/object: " + WORKS_AT + "/" + DONUT_SHOP + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, BOB));
    log.info("    predicate/object: " + LIVES_WITH + "/" + BOB + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, CHARLIE));
    log.info("    predicate/object: " + LIVES_WITH + "/" + CHARLIE + ", cardinality: " + cardinality);
    cardinality = dao.getCardinality(config, CARDINALITY_OF.PREDICATEOBJECT, Lists.newArrayList(LIVES_WITH, ALICE));
    log.info("    predicate/object: " + LIVES_WITH + "/" + ALICE + ", cardinality: " + cardinality);
}
Also used : SailConnection(org.openrdf.sail.SailConnection) ProspectorServiceEvalStatsDAO(org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO) Statement(org.openrdf.model.Statement) Sail(org.openrdf.sail.Sail) Prospector(org.apache.rya.prospector.mr.Prospector) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration)

Aggregations

ProspectorServiceEvalStatsDAO (org.apache.rya.prospector.service.ProspectorServiceEvalStatsDAO)14 RdfCloudTripleStoreConfiguration (org.apache.rya.api.RdfCloudTripleStoreConfiguration)10 AccumuloSelectivityEvalDAO (org.apache.rya.joinselect.AccumuloSelectivityEvalDAO)10 ArrayList (java.util.ArrayList)8 BatchWriter (org.apache.accumulo.core.client.BatchWriter)8 Mutation (org.apache.accumulo.core.data.Mutation)8 Value (org.apache.accumulo.core.data.Value)8 Text (org.apache.hadoop.io.Text)8 Test (org.junit.Test)8 TupleExpr (org.openrdf.query.algebra.TupleExpr)8 Map (java.util.Map)6 Scanner (org.apache.accumulo.core.client.Scanner)6 Key (org.apache.accumulo.core.data.Key)6 Range (org.apache.accumulo.core.data.Range)6 Authorizations (org.apache.accumulo.core.security.Authorizations)6 QueryJoinSelectOptimizer (org.apache.rya.rdftriplestore.evaluation.QueryJoinSelectOptimizer)6 RdfCloudTripleStoreSelectivityEvaluationStatistics (org.apache.rya.rdftriplestore.evaluation.RdfCloudTripleStoreSelectivityEvaluationStatistics)6 AccumuloRdfConfiguration (org.apache.rya.accumulo.AccumuloRdfConfiguration)5 BatchWriterConfig (org.apache.accumulo.core.client.BatchWriterConfig)3 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)3