use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.
the class BatchIT method simpleJoinAdd.
@Test
public void simpleJoinAdd() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ.
String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
String joinId = ids.get(2);
String rightSp = ids.get(4);
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding("subject", vf.createURI("urn:subject_1"));
bs.addBinding("object1", vf.createURI("urn:object_0"));
VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
URI uri = vf.createURI("urn:subject_1");
Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
Span span = Span.prefix(prefixBytes);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements2, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(0, 0, 0, 0, 5));
JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Add).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
// Verify the end results of the query match the expected results.
createSpanBatch(fluoClient, joinId, batch);
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(5, 5, 5, 0, 5));
}
}
use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.
the class BatchIT method simpleJoinDelete.
@Test
public void simpleJoinDelete() throws Exception {
final String sparql = "SELECT ?subject ?object1 ?object2 WHERE { ?subject <urn:predicate_1> ?object1; " + " <urn:predicate_2> ?object2 } ";
try (FluoClient fluoClient = new FluoClientImpl(getFluoConfiguration())) {
RyaURI subj = new RyaURI("urn:subject_1");
RyaStatement statement1 = new RyaStatement(subj, new RyaURI("urn:predicate_1"), null);
RyaStatement statement2 = new RyaStatement(subj, new RyaURI("urn:predicate_2"), null);
Set<RyaStatement> statements1 = getRyaStatements(statement1, 5);
Set<RyaStatement> statements2 = getRyaStatements(statement2, 5);
// Create the PCJ table.
final PrecomputedJoinStorage pcjStorage = new AccumuloPcjStorage(getAccumuloConnector(), getRyaInstanceName());
final String pcjId = pcjStorage.createPcj(sparql);
// Tell the Fluo app to maintain the PCJ.
String queryId = new CreateFluoPcj().withRyaIntegration(pcjId, pcjStorage, fluoClient, getAccumuloConnector(), getRyaInstanceName()).getQueryId();
List<String> ids = getNodeIdStrings(fluoClient, queryId);
String joinId = ids.get(2);
String rightSp = ids.get(4);
QueryBindingSet bs = new QueryBindingSet();
bs.addBinding("subject", vf.createURI("urn:subject_1"));
bs.addBinding("object1", vf.createURI("urn:object_0"));
VisibilityBindingSet vBs = new VisibilityBindingSet(bs);
// create sharded span for deletion
URI uri = vf.createURI("urn:subject_1");
Bytes prefixBytes = BindingHashShardingFunction.getShardedScanPrefix(rightSp, uri);
Span span = Span.prefix(prefixBytes);
// Stream the data into Fluo.
InsertTriples inserter = new InsertTriples();
inserter.insert(fluoClient, statements1, Optional.absent());
inserter.insert(fluoClient, statements2, Optional.absent());
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 25, 5, 5));
JoinBatchInformation batch = JoinBatchInformation.builder().setBatchSize(1).setColumn(FluoQueryColumns.STATEMENT_PATTERN_BINDING_SET).setSpan(span).setTask(Task.Delete).setJoinType(JoinType.NATURAL_JOIN).setSide(Side.LEFT).setBs(vBs).build();
// Verify the end results of the query match the expected results.
createSpanBatch(fluoClient, joinId, batch);
getMiniFluo().waitForObservers();
verifyCounts(fluoClient, ids, Arrays.asList(25, 25, 20, 5, 5));
}
}
use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.
the class PeriodicNotificationBinPrunerIT method periodicPrunerTest.
@Test
public void periodicPrunerTest() throws Exception {
String sparql = // n
"prefix function: <http://org.apache.rya/function#> " + // n
"prefix time: <http://www.w3.org/2006/time#> " + // n
"select ?id (count(?obs) as ?total) where {" + // n
"Filter(function:periodic(?time, 2, .5, time:hours)) " + // n
"?obs <uri:hasTime> ?time. " + // n
"?obs <uri:hasId> ?id } group by ?id";
FluoClient fluo = new FluoClientImpl(super.getFluoConfiguration());
// initialize resources and create pcj
PeriodicQueryResultStorage periodicStorage = new AccumuloPeriodicQueryResultStorage(super.getAccumuloConnector(), getRyaInstanceName());
CreatePeriodicQuery createPeriodicQuery = new CreatePeriodicQuery(fluo, periodicStorage);
String queryId = FluoQueryUtils.convertFluoQueryIdToPcjId(createPeriodicQuery.createPeriodicQuery(sparql).getQueryId());
// create statements to ingest into Fluo
final ValueFactory vf = new ValueFactoryImpl();
final DatatypeFactory dtf = DatatypeFactory.newInstance();
ZonedDateTime time = ZonedDateTime.now();
long currentTime = time.toInstant().toEpochMilli();
ZonedDateTime zTime1 = time.minusMinutes(30);
String time1 = zTime1.format(DateTimeFormatter.ISO_INSTANT);
ZonedDateTime zTime2 = zTime1.minusMinutes(30);
String time2 = zTime2.format(DateTimeFormatter.ISO_INSTANT);
ZonedDateTime zTime3 = zTime2.minusMinutes(30);
String time3 = zTime3.format(DateTimeFormatter.ISO_INSTANT);
ZonedDateTime zTime4 = zTime3.minusMinutes(30);
String time4 = zTime4.format(DateTimeFormatter.ISO_INSTANT);
final Collection<Statement> statements = Sets.newHashSet(vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time1))), vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasId"), vf.createLiteral("id_1")), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time2))), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasId"), vf.createLiteral("id_2")), vf.createStatement(vf.createURI("urn:obs_3"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time3))), vf.createStatement(vf.createURI("urn:obs_3"), vf.createURI("uri:hasId"), vf.createLiteral("id_3")), vf.createStatement(vf.createURI("urn:obs_4"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time4))), vf.createStatement(vf.createURI("urn:obs_4"), vf.createURI("uri:hasId"), vf.createLiteral("id_4")), vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time4))), vf.createStatement(vf.createURI("urn:obs_1"), vf.createURI("uri:hasId"), vf.createLiteral("id_1")), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasTime"), vf.createLiteral(dtf.newXMLGregorianCalendar(time3))), vf.createStatement(vf.createURI("urn:obs_2"), vf.createURI("uri:hasId"), vf.createLiteral("id_2")));
// add statements to Fluo
InsertTriples inserter = new InsertTriples();
statements.forEach(x -> inserter.insert(fluo, RdfToRyaConversions.convertStatement(x)));
super.getMiniFluo().waitForObservers();
// FluoITHelper.printFluoTable(fluo);
// Create the expected results of the SPARQL query once the PCJ has been
// computed.
final Set<BindingSet> expected1 = new HashSet<>();
final Set<BindingSet> expected2 = new HashSet<>();
final Set<BindingSet> expected3 = new HashSet<>();
final Set<BindingSet> expected4 = new HashSet<>();
long period = 1800000;
long binId = (currentTime / period) * period;
long bin1 = binId;
long bin2 = binId + period;
long bin3 = binId + 2 * period;
long bin4 = binId + 3 * period;
MapBindingSet bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("2", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin1));
expected1.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("2", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_2", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin1));
expected1.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_3", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin1));
expected1.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_4", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin1));
expected1.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin2));
expected2.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("2", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_2", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin2));
expected2.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_3", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin2));
expected2.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin3));
expected3.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_2", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin3));
expected3.add(bs);
bs = new MapBindingSet();
bs.addBinding("total", vf.createLiteral("1", XMLSchema.INTEGER));
bs.addBinding("id", vf.createLiteral("id_1", XMLSchema.STRING));
bs.addBinding("periodicBinId", vf.createLiteral(bin4));
expected4.add(bs);
// make sure that expected and actual results align after ingest
compareResults(periodicStorage, queryId, bin1, expected1);
compareResults(periodicStorage, queryId, bin2, expected2);
compareResults(periodicStorage, queryId, bin3, expected3);
compareResults(periodicStorage, queryId, bin4, expected4);
BlockingQueue<NodeBin> bins = new LinkedBlockingQueue<>();
PeriodicQueryPrunerExecutor pruner = new PeriodicQueryPrunerExecutor(periodicStorage, fluo, 1, bins);
pruner.start();
bins.add(new NodeBin(queryId, bin1));
bins.add(new NodeBin(queryId, bin2));
bins.add(new NodeBin(queryId, bin3));
bins.add(new NodeBin(queryId, bin4));
Thread.sleep(10000);
compareResults(periodicStorage, queryId, bin1, new HashSet<>());
compareResults(periodicStorage, queryId, bin2, new HashSet<>());
compareResults(periodicStorage, queryId, bin3, new HashSet<>());
compareResults(periodicStorage, queryId, bin4, new HashSet<>());
compareFluoCounts(fluo, queryId, bin1);
compareFluoCounts(fluo, queryId, bin2);
compareFluoCounts(fluo, queryId, bin3);
compareFluoCounts(fluo, queryId, bin4);
pruner.stop();
}
use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.
the class LoadTriplesCommand method execute.
@Override
public void execute(final Connector accumulo, final String ryaTablePrefix, final RyaSailRepository rya, final FluoClient fluo, final String[] args) throws ArgumentsException, ExecutionException {
checkNotNull(accumulo);
checkNotNull(fluo);
checkNotNull(args);
log.trace("Executing the Load Triples Command...");
// Parse the command line arguments.
final Parameters params = new Parameters();
try {
new JCommander(params, args);
} catch (final ParameterException e) {
throw new ArgumentsException("Could not load the Triples file because of invalid command line parameters.", e);
}
// Iterate over the Statements that are in the input file and write them to Fluo.
log.trace("Loading RDF Statements from the Triples file '" + params.nTriplesFile + "'.");
final Path triplesPath = Paths.get(params.nTriplesFile);
try {
final RDFParser parser = Rio.createParser(RDFFormat.forFileName(triplesPath.getFileName().toString()));
final FluoLoader loader = new FluoLoader(fluo, new InsertTriples());
parser.setRDFHandler(loader);
parser.parse(Files.newInputStream(triplesPath), triplesPath.toUri().toString());
} catch (final Exception e) {
throw new ExecutionException("Could not load the RDF file into the Fluo app.", e);
}
log.trace("Finished executing the Load Triples Command.");
}
use of org.apache.rya.indexing.pcj.fluo.api.InsertTriples in project incubator-rya by apache.
the class PeriodicNotificationApplicationIT method addData.
private void addData(final Collection<Statement> statements) throws DatatypeConfigurationException {
// add statements to Fluo
try (FluoClient fluo = new FluoClientImpl(getFluoConfiguration())) {
final InsertTriples inserter = new InsertTriples();
statements.forEach(x -> inserter.insert(fluo, RdfToRyaConversions.convertStatement(x)));
getMiniFluo().waitForObservers();
}
}
Aggregations