Search in sources :

Example 1 with ProspectorService

use of org.apache.rya.prospector.service.ProspectorService in project incubator-rya by apache.

the class ProspectorTest method testCount.

@Test
public void testCount() throws Exception {
    // Load some data into a mock Accumulo and run the Prospector MapReduce job.
    final Instance mock = new MockInstance("accumulo");
    final Connector connector = mock.getConnector("user", new PasswordToken("pass"));
    final String outtable = "rya_prospects";
    if (connector.tableOperations().exists(outtable)) {
        connector.tableOperations().delete(outtable);
    }
    connector.tableOperations().create(outtable);
    final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
    ryaDAO.setConnector(connector);
    ryaDAO.init();
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")));
    final String confFile = "stats_cluster_config.xml";
    final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString());
    final String[] args = { confPath.toString() };
    ToolRunner.run(new Prospector(), args);
    ryaDAO.destroy();
    // Interrogate the results of the Prospect job to ensure the correct results were created.
    final Configuration conf = new Configuration();
    conf.addResource(confPath);
    final ProspectorService service = new ProspectorService(connector, outtable);
    final String[] auths = { "U", "FOUO" };
    Iterator<Long> prospects = service.getProspects(auths);
    List<Long> plist = Lists.newArrayList(prospects);
    assertEquals(1, plist.size());
    final Long prospectTimestamp = plist.iterator().next();
    final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf);
    rdfConf.setAuths("U", "FOUO");
    prospects = service.getProspectsInRange(System.currentTimeMillis() - 100000, System.currentTimeMillis() + 10000, auths);
    plist = Lists.newArrayList(prospects);
    assertEquals(1, plist.size());
    // Ensure one of the correct "entity" counts was created.
    List<String> queryTerms = new ArrayList<>();
    queryTerms.add("urn:gem:etype");
    final List<IndexEntry> entityEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.ENTITY.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
    final List<IndexEntry> expectedEntityEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.ENTITY.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(5)).build());
    assertEquals(expectedEntityEntries, entityEntries);
    // Ensure one of the correct "subject" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem:etype#1234");
    final List<IndexEntry> subjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
    final List<IndexEntry> expectedSubjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.SUBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(3)).build());
    assertEquals(expectedSubjectEntries, subjectEntries);
    // Ensure one of the correct "predicate" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem#pred");
    final List<IndexEntry> predicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
    final List<IndexEntry> expectedPredicateEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem#pred").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.PREDICATE.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(4)).build());
    assertEquals(expectedPredicateEntries, predicateEntries);
    // Ensure one of the correct "object" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("mydata1");
    final List<IndexEntry> objectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("mydata1").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(1)).build());
    assertEquals(expectedObjectEntries, objectEntries);
    // Ensure one of the correct "subjectpredicate" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem:etype#1234");
    queryTerms.add("urn:gem#pred");
    final List<IndexEntry> subjectPredicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_PREDICATE.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedSubjectPredicateEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234" + "\u0000" + "urn:gem#pred").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.SUBJECT_PREDICATE.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(3)).build());
    assertEquals(expectedSubjectPredicateEntries, subjectPredicateEntries);
    // Ensure one of the correct "predicateobject" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem#pred");
    queryTerms.add("12");
    final List<IndexEntry> predicateObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedPredicateObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem#pred" + "\u0000" + "12").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.PREDICATE_OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(// XXX This might be a bug. The object matching doesn't care about type.
    new Long(2)).build());
    assertEquals(expectedPredicateObjectEntries, predicateObjectEntries);
    // Ensure one of the correct "" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem:etype#1234");
    queryTerms.add("mydata1");
    final List<IndexEntry> subjectObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedSubjectObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234" + "\u0000" + "mydata1").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.SUBJECT_OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(1)).build());
    assertEquals(expectedSubjectObjectEntries, subjectObjectEntries);
}
Also used : Path(org.apache.hadoop.fs.Path) Connector(org.apache.accumulo.core.client.Connector) AccumuloRyaDAO(org.apache.rya.accumulo.AccumuloRyaDAO) Configuration(org.apache.hadoop.conf.Configuration) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) ArrayList(java.util.ArrayList) RyaStatement(org.apache.rya.api.domain.RyaStatement) IndexEntry(org.apache.rya.prospector.domain.IndexEntry) RyaType(org.apache.rya.api.domain.RyaType) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) RyaURI(org.apache.rya.api.domain.RyaURI) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) ProspectorService(org.apache.rya.prospector.service.ProspectorService) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)1 Connector (org.apache.accumulo.core.client.Connector)1 Instance (org.apache.accumulo.core.client.Instance)1 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)1 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 AccumuloRdfConfiguration (org.apache.rya.accumulo.AccumuloRdfConfiguration)1 AccumuloRyaDAO (org.apache.rya.accumulo.AccumuloRyaDAO)1 RyaStatement (org.apache.rya.api.domain.RyaStatement)1 RyaType (org.apache.rya.api.domain.RyaType)1 RyaURI (org.apache.rya.api.domain.RyaURI)1 IndexEntry (org.apache.rya.prospector.domain.IndexEntry)1 ProspectorService (org.apache.rya.prospector.service.ProspectorService)1 Test (org.junit.Test)1