Search in sources :

Example 1 with IndexEntry

use of org.apache.rya.prospector.domain.IndexEntry in project incubator-rya by apache.

the class CountPlan method query.

@Override
public List<IndexEntry> query(final Connector connector, final String tableName, final List<Long> prospectTimes, final String type, final String compositeIndex, final String dataType, final String[] auths) throws TableNotFoundException {
    assert connector != null && tableName != null && type != null && compositeIndex != null;
    final BatchScanner bs = connector.createBatchScanner(tableName, new Authorizations(auths), 4);
    final List<Range> ranges = new ArrayList<>();
    // by default only return 1000 prospects maximum
    int max = 1000;
    if (prospectTimes != null) {
        for (final Long prospectTime : prospectTimes) {
            ranges.add(new Range(type + DELIM + compositeIndex + DELIM + ProspectorUtils.getReverseIndexDateTime(new Date(prospectTime))));
        }
    } else {
        // only return the latest if no prospectTimes given
        max = 1;
        final String prefix = type + DELIM + compositeIndex + DELIM;
        ranges.add(new Range(prefix, prefix + RdfCloudTripleStoreConstants.LAST));
    }
    bs.setRanges(ranges);
    if (dataType != null) {
        bs.fetchColumn(new Text(COUNT), new Text(dataType));
    } else {
        bs.fetchColumnFamily(new Text(COUNT));
    }
    final List<IndexEntry> indexEntries = new ArrayList<IndexEntry>();
    final Iterator<Entry<Key, Value>> iter = bs.iterator();
    while (iter.hasNext() && indexEntries.size() <= max) {
        final Entry<Key, Value> entry = iter.next();
        final Key k = entry.getKey();
        final Value v = entry.getValue();
        final String[] rowArr = k.getRow().toString().split(DELIM);
        String values = "";
        // if it is a composite index, then return the type as a composite index
        if (type.equalsIgnoreCase(TripleValueType.SUBJECT_PREDICATE.getIndexType()) || type.equalsIgnoreCase(TripleValueType.SUBJECT_OBJECT.getIndexType()) || type.equalsIgnoreCase(TripleValueType.PREDICATE_OBJECT.getIndexType())) {
            values = rowArr[1] + DELIM + rowArr[2];
        } else {
            values = rowArr[1];
        }
        // Create an entry using the values that were found.
        final String entryDataType = k.getColumnQualifier().toString();
        final String entryVisibility = k.getColumnVisibility().toString();
        final Long entryCount = Long.parseLong(new String(v.get(), StandardCharsets.UTF_8));
        indexEntries.add(IndexEntry.builder().setData(values).setTripleValueType(rowArr[0]).setIndex(COUNT).setDataType(entryDataType).setVisibility(entryVisibility).setCount(entryCount).setTimestamp(k.getTimestamp()).build());
    }
    bs.close();
    return indexEntries;
}
Also used : Authorizations(org.apache.accumulo.core.security.Authorizations) BatchScanner(org.apache.accumulo.core.client.BatchScanner) ArrayList(java.util.ArrayList) IndexEntry(org.apache.rya.prospector.domain.IndexEntry) Text(org.apache.hadoop.io.Text) Range(org.apache.accumulo.core.data.Range) Date(java.util.Date) IndexEntry(org.apache.rya.prospector.domain.IndexEntry) CustomEntry(org.apache.rya.prospector.utils.CustomEntry) Entry(java.util.Map.Entry) Value(org.apache.accumulo.core.data.Value) Key(org.apache.accumulo.core.data.Key)

Example 2 with IndexEntry

use of org.apache.rya.prospector.domain.IndexEntry in project incubator-rya by apache.

the class ProspectorServiceEvalStatsDAO method getCardinality.

@Override
public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List<Value> val) throws RdfDAOException {
    assert conf != null && card != null && val != null;
    String triplePart = null;
    switch(card) {
        case SUBJECT:
            triplePart = TripleValueType.SUBJECT.getIndexType();
            break;
        case PREDICATE:
            triplePart = TripleValueType.PREDICATE.getIndexType();
            break;
        case OBJECT:
            triplePart = TripleValueType.OBJECT.getIndexType();
            break;
        case SUBJECTPREDICATE:
            triplePart = TripleValueType.SUBJECT_PREDICATE.getIndexType();
            break;
        case SUBJECTOBJECT:
            triplePart = TripleValueType.SUBJECT_OBJECT.getIndexType();
            break;
        case PREDICATEOBJECT:
            triplePart = TripleValueType.PREDICATE_OBJECT.getIndexType();
            break;
    }
    final String[] auths = conf.getAuths();
    final List<String> indexedValues = new ArrayList<>();
    final Iterator<Value> valueIt = val.iterator();
    while (valueIt.hasNext()) {
        indexedValues.add(valueIt.next().stringValue());
    }
    double cardinality = -1;
    try {
        final List<IndexEntry> entries = prospectorService.query(null, ProspectorConstants.COUNT, triplePart, indexedValues, null, auths);
        if (!entries.isEmpty()) {
            cardinality = entries.iterator().next().getCount();
        }
    } catch (final TableNotFoundException e) {
        throw new RdfDAOException(e);
    }
    return cardinality;
}
Also used : TableNotFoundException(org.apache.accumulo.core.client.TableNotFoundException) ArrayList(java.util.ArrayList) Value(org.openrdf.model.Value) IndexEntry(org.apache.rya.prospector.domain.IndexEntry) RdfDAOException(org.apache.rya.api.persist.RdfDAOException)

Example 3 with IndexEntry

use of org.apache.rya.prospector.domain.IndexEntry in project incubator-rya by apache.

the class ProspectorTest method testCount.

@Test
public void testCount() throws Exception {
    // Load some data into a mock Accumulo and run the Prospector MapReduce job.
    final Instance mock = new MockInstance("accumulo");
    final Connector connector = mock.getConnector("user", new PasswordToken("pass"));
    final String outtable = "rya_prospects";
    if (connector.tableOperations().exists(outtable)) {
        connector.tableOperations().delete(outtable);
    }
    connector.tableOperations().create(outtable);
    final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
    ryaDAO.setConnector(connector);
    ryaDAO.init();
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")));
    ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")));
    final String confFile = "stats_cluster_config.xml";
    final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString());
    final String[] args = { confPath.toString() };
    ToolRunner.run(new Prospector(), args);
    ryaDAO.destroy();
    // Interrogate the results of the Prospect job to ensure the correct results were created.
    final Configuration conf = new Configuration();
    conf.addResource(confPath);
    final ProspectorService service = new ProspectorService(connector, outtable);
    final String[] auths = { "U", "FOUO" };
    Iterator<Long> prospects = service.getProspects(auths);
    List<Long> plist = Lists.newArrayList(prospects);
    assertEquals(1, plist.size());
    final Long prospectTimestamp = plist.iterator().next();
    final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf);
    rdfConf.setAuths("U", "FOUO");
    prospects = service.getProspectsInRange(System.currentTimeMillis() - 100000, System.currentTimeMillis() + 10000, auths);
    plist = Lists.newArrayList(prospects);
    assertEquals(1, plist.size());
    // Ensure one of the correct "entity" counts was created.
    List<String> queryTerms = new ArrayList<>();
    queryTerms.add("urn:gem:etype");
    final List<IndexEntry> entityEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.ENTITY.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
    final List<IndexEntry> expectedEntityEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.ENTITY.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(5)).build());
    assertEquals(expectedEntityEntries, entityEntries);
    // Ensure one of the correct "subject" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem:etype#1234");
    final List<IndexEntry> subjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
    final List<IndexEntry> expectedSubjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.SUBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(3)).build());
    assertEquals(expectedSubjectEntries, subjectEntries);
    // Ensure one of the correct "predicate" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem#pred");
    final List<IndexEntry> predicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
    final List<IndexEntry> expectedPredicateEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem#pred").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.PREDICATE.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(4)).build());
    assertEquals(expectedPredicateEntries, predicateEntries);
    // Ensure one of the correct "object" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("mydata1");
    final List<IndexEntry> objectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("mydata1").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(1)).build());
    assertEquals(expectedObjectEntries, objectEntries);
    // Ensure one of the correct "subjectpredicate" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem:etype#1234");
    queryTerms.add("urn:gem#pred");
    final List<IndexEntry> subjectPredicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_PREDICATE.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedSubjectPredicateEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234" + "\u0000" + "urn:gem#pred").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.SUBJECT_PREDICATE.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(3)).build());
    assertEquals(expectedSubjectPredicateEntries, subjectPredicateEntries);
    // Ensure one of the correct "predicateobject" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem#pred");
    queryTerms.add("12");
    final List<IndexEntry> predicateObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedPredicateObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem#pred" + "\u0000" + "12").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.PREDICATE_OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(// XXX This might be a bug. The object matching doesn't care about type.
    new Long(2)).build());
    assertEquals(expectedPredicateObjectEntries, predicateObjectEntries);
    // Ensure one of the correct "" counts was created.
    queryTerms = new ArrayList<String>();
    queryTerms.add("urn:gem:etype#1234");
    queryTerms.add("mydata1");
    final List<IndexEntry> subjectObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
    final List<IndexEntry> expectedSubjectObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234" + "\u0000" + "mydata1").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.SUBJECT_OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(1)).build());
    assertEquals(expectedSubjectObjectEntries, subjectObjectEntries);
}
Also used : Path(org.apache.hadoop.fs.Path) Connector(org.apache.accumulo.core.client.Connector) AccumuloRyaDAO(org.apache.rya.accumulo.AccumuloRyaDAO) Configuration(org.apache.hadoop.conf.Configuration) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) Instance(org.apache.accumulo.core.client.Instance) ArrayList(java.util.ArrayList) RyaStatement(org.apache.rya.api.domain.RyaStatement) IndexEntry(org.apache.rya.prospector.domain.IndexEntry) RyaType(org.apache.rya.api.domain.RyaType) AccumuloRdfConfiguration(org.apache.rya.accumulo.AccumuloRdfConfiguration) RyaURI(org.apache.rya.api.domain.RyaURI) PasswordToken(org.apache.accumulo.core.client.security.tokens.PasswordToken) MockInstance(org.apache.accumulo.core.client.mock.MockInstance) ProspectorService(org.apache.rya.prospector.service.ProspectorService) Test(org.junit.Test)

Aggregations

ArrayList (java.util.ArrayList)3 IndexEntry (org.apache.rya.prospector.domain.IndexEntry)3 Date (java.util.Date)1 Entry (java.util.Map.Entry)1 BatchScanner (org.apache.accumulo.core.client.BatchScanner)1 Connector (org.apache.accumulo.core.client.Connector)1 Instance (org.apache.accumulo.core.client.Instance)1 TableNotFoundException (org.apache.accumulo.core.client.TableNotFoundException)1 MockInstance (org.apache.accumulo.core.client.mock.MockInstance)1 PasswordToken (org.apache.accumulo.core.client.security.tokens.PasswordToken)1 Key (org.apache.accumulo.core.data.Key)1 Range (org.apache.accumulo.core.data.Range)1 Value (org.apache.accumulo.core.data.Value)1 Authorizations (org.apache.accumulo.core.security.Authorizations)1 Configuration (org.apache.hadoop.conf.Configuration)1 Path (org.apache.hadoop.fs.Path)1 Text (org.apache.hadoop.io.Text)1 AccumuloRdfConfiguration (org.apache.rya.accumulo.AccumuloRdfConfiguration)1 AccumuloRyaDAO (org.apache.rya.accumulo.AccumuloRyaDAO)1 RyaStatement (org.apache.rya.api.domain.RyaStatement)1