use of org.apache.rya.prospector.domain.IndexEntry in project incubator-rya by apache.
the class CountPlan method query.
@Override
public List<IndexEntry> query(final Connector connector, final String tableName, final List<Long> prospectTimes, final String type, final String compositeIndex, final String dataType, final String[] auths) throws TableNotFoundException {
assert connector != null && tableName != null && type != null && compositeIndex != null;
final BatchScanner bs = connector.createBatchScanner(tableName, new Authorizations(auths), 4);
final List<Range> ranges = new ArrayList<>();
// by default only return 1000 prospects maximum
int max = 1000;
if (prospectTimes != null) {
for (final Long prospectTime : prospectTimes) {
ranges.add(new Range(type + DELIM + compositeIndex + DELIM + ProspectorUtils.getReverseIndexDateTime(new Date(prospectTime))));
}
} else {
// only return the latest if no prospectTimes given
max = 1;
final String prefix = type + DELIM + compositeIndex + DELIM;
ranges.add(new Range(prefix, prefix + RdfCloudTripleStoreConstants.LAST));
}
bs.setRanges(ranges);
if (dataType != null) {
bs.fetchColumn(new Text(COUNT), new Text(dataType));
} else {
bs.fetchColumnFamily(new Text(COUNT));
}
final List<IndexEntry> indexEntries = new ArrayList<IndexEntry>();
final Iterator<Entry<Key, Value>> iter = bs.iterator();
while (iter.hasNext() && indexEntries.size() <= max) {
final Entry<Key, Value> entry = iter.next();
final Key k = entry.getKey();
final Value v = entry.getValue();
final String[] rowArr = k.getRow().toString().split(DELIM);
String values = "";
// if it is a composite index, then return the type as a composite index
if (type.equalsIgnoreCase(TripleValueType.SUBJECT_PREDICATE.getIndexType()) || type.equalsIgnoreCase(TripleValueType.SUBJECT_OBJECT.getIndexType()) || type.equalsIgnoreCase(TripleValueType.PREDICATE_OBJECT.getIndexType())) {
values = rowArr[1] + DELIM + rowArr[2];
} else {
values = rowArr[1];
}
// Create an entry using the values that were found.
final String entryDataType = k.getColumnQualifier().toString();
final String entryVisibility = k.getColumnVisibility().toString();
final Long entryCount = Long.parseLong(new String(v.get(), StandardCharsets.UTF_8));
indexEntries.add(IndexEntry.builder().setData(values).setTripleValueType(rowArr[0]).setIndex(COUNT).setDataType(entryDataType).setVisibility(entryVisibility).setCount(entryCount).setTimestamp(k.getTimestamp()).build());
}
bs.close();
return indexEntries;
}
use of org.apache.rya.prospector.domain.IndexEntry in project incubator-rya by apache.
the class ProspectorServiceEvalStatsDAO method getCardinality.
@Override
public double getCardinality(RdfCloudTripleStoreConfiguration conf, CARDINALITY_OF card, List<Value> val) throws RdfDAOException {
assert conf != null && card != null && val != null;
String triplePart = null;
switch(card) {
case SUBJECT:
triplePart = TripleValueType.SUBJECT.getIndexType();
break;
case PREDICATE:
triplePart = TripleValueType.PREDICATE.getIndexType();
break;
case OBJECT:
triplePart = TripleValueType.OBJECT.getIndexType();
break;
case SUBJECTPREDICATE:
triplePart = TripleValueType.SUBJECT_PREDICATE.getIndexType();
break;
case SUBJECTOBJECT:
triplePart = TripleValueType.SUBJECT_OBJECT.getIndexType();
break;
case PREDICATEOBJECT:
triplePart = TripleValueType.PREDICATE_OBJECT.getIndexType();
break;
}
final String[] auths = conf.getAuths();
final List<String> indexedValues = new ArrayList<>();
final Iterator<Value> valueIt = val.iterator();
while (valueIt.hasNext()) {
indexedValues.add(valueIt.next().stringValue());
}
double cardinality = -1;
try {
final List<IndexEntry> entries = prospectorService.query(null, ProspectorConstants.COUNT, triplePart, indexedValues, null, auths);
if (!entries.isEmpty()) {
cardinality = entries.iterator().next().getCount();
}
} catch (final TableNotFoundException e) {
throw new RdfDAOException(e);
}
return cardinality;
}
use of org.apache.rya.prospector.domain.IndexEntry in project incubator-rya by apache.
the class ProspectorTest method testCount.
@Test
public void testCount() throws Exception {
// Load some data into a mock Accumulo and run the Prospector MapReduce job.
final Instance mock = new MockInstance("accumulo");
final Connector connector = mock.getConnector("user", new PasswordToken("pass"));
final String outtable = "rya_prospects";
if (connector.tableOperations().exists(outtable)) {
connector.tableOperations().delete(outtable);
}
connector.tableOperations().create(outtable);
final AccumuloRyaDAO ryaDAO = new AccumuloRyaDAO();
ryaDAO.setConnector(connector);
ryaDAO.init();
ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata1")));
ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("mydata2")));
ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1234"), new RyaURI("urn:gem#pred"), new RyaType("12")));
ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred"), new RyaType(XMLSchema.INTEGER, "12")));
ryaDAO.add(new RyaStatement(new RyaURI("urn:gem:etype#1235"), new RyaURI("urn:gem#pred1"), new RyaType("12")));
final String confFile = "stats_cluster_config.xml";
final Path confPath = new Path(getClass().getClassLoader().getResource(confFile).toString());
final String[] args = { confPath.toString() };
ToolRunner.run(new Prospector(), args);
ryaDAO.destroy();
// Interrogate the results of the Prospect job to ensure the correct results were created.
final Configuration conf = new Configuration();
conf.addResource(confPath);
final ProspectorService service = new ProspectorService(connector, outtable);
final String[] auths = { "U", "FOUO" };
Iterator<Long> prospects = service.getProspects(auths);
List<Long> plist = Lists.newArrayList(prospects);
assertEquals(1, plist.size());
final Long prospectTimestamp = plist.iterator().next();
final AccumuloRdfConfiguration rdfConf = new AccumuloRdfConfiguration(conf);
rdfConf.setAuths("U", "FOUO");
prospects = service.getProspectsInRange(System.currentTimeMillis() - 100000, System.currentTimeMillis() + 10000, auths);
plist = Lists.newArrayList(prospects);
assertEquals(1, plist.size());
// Ensure one of the correct "entity" counts was created.
List<String> queryTerms = new ArrayList<>();
queryTerms.add("urn:gem:etype");
final List<IndexEntry> entityEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.ENTITY.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
final List<IndexEntry> expectedEntityEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.ENTITY.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(5)).build());
assertEquals(expectedEntityEntries, entityEntries);
// Ensure one of the correct "subject" counts was created.
queryTerms = new ArrayList<String>();
queryTerms.add("urn:gem:etype#1234");
final List<IndexEntry> subjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
final List<IndexEntry> expectedSubjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.SUBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(3)).build());
assertEquals(expectedSubjectEntries, subjectEntries);
// Ensure one of the correct "predicate" counts was created.
queryTerms = new ArrayList<String>();
queryTerms.add("urn:gem#pred");
final List<IndexEntry> predicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE.getIndexType(), queryTerms, XMLSchema.ANYURI.stringValue(), auths);
final List<IndexEntry> expectedPredicateEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem#pred").setDataType(XMLSchema.ANYURI.stringValue()).setTripleValueType(TripleValueType.PREDICATE.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(4)).build());
assertEquals(expectedPredicateEntries, predicateEntries);
// Ensure one of the correct "object" counts was created.
queryTerms = new ArrayList<String>();
queryTerms.add("mydata1");
final List<IndexEntry> objectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
final List<IndexEntry> expectedObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("mydata1").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(1)).build());
assertEquals(expectedObjectEntries, objectEntries);
// Ensure one of the correct "subjectpredicate" counts was created.
queryTerms = new ArrayList<String>();
queryTerms.add("urn:gem:etype#1234");
queryTerms.add("urn:gem#pred");
final List<IndexEntry> subjectPredicateEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_PREDICATE.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
final List<IndexEntry> expectedSubjectPredicateEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234" + "\u0000" + "urn:gem#pred").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.SUBJECT_PREDICATE.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(3)).build());
assertEquals(expectedSubjectPredicateEntries, subjectPredicateEntries);
// Ensure one of the correct "predicateobject" counts was created.
queryTerms = new ArrayList<String>();
queryTerms.add("urn:gem#pred");
queryTerms.add("12");
final List<IndexEntry> predicateObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.PREDICATE_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
final List<IndexEntry> expectedPredicateObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem#pred" + "\u0000" + "12").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.PREDICATE_OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(// XXX This might be a bug. The object matching doesn't care about type.
new Long(2)).build());
assertEquals(expectedPredicateObjectEntries, predicateObjectEntries);
// Ensure one of the correct "" counts was created.
queryTerms = new ArrayList<String>();
queryTerms.add("urn:gem:etype#1234");
queryTerms.add("mydata1");
final List<IndexEntry> subjectObjectEntries = service.query(plist, ProspectorConstants.COUNT, TripleValueType.SUBJECT_OBJECT.getIndexType(), queryTerms, XMLSchema.STRING.stringValue(), auths);
final List<IndexEntry> expectedSubjectObjectEntries = Lists.newArrayList(IndexEntry.builder().setIndex(ProspectorConstants.COUNT).setData("urn:gem:etype#1234" + "\u0000" + "mydata1").setDataType(XMLSchema.STRING.stringValue()).setTripleValueType(TripleValueType.SUBJECT_OBJECT.getIndexType()).setVisibility("").setTimestamp(prospectTimestamp).setCount(new Long(1)).build());
assertEquals(expectedSubjectObjectEntries, subjectObjectEntries);
}
Aggregations