Search in sources :

Example 21 with TopologyBuilder

use of org.apache.kafka.streams.processor.TopologyBuilder in project kafka by apache.

the class StreamThreadTest method testInjectClients.

@Test
public void testInjectClients() {
    TopologyBuilder builder = new TopologyBuilder().setApplicationId("X");
    StreamsConfig config = new StreamsConfig(configProps());
    MockClientSupplier clientSupplier = new MockClientSupplier();
    StreamThread thread = new StreamThread(builder, config, clientSupplier, applicationId, clientId, processId, new Metrics(), new MockTime(), new StreamsMetadataState(builder, StreamsMetadataState.UNKNOWN_HOST), 0);
    assertSame(clientSupplier.producer, thread.producer);
    assertSame(clientSupplier.consumer, thread.consumer);
    assertSame(clientSupplier.restoreConsumer, thread.restoreConsumer);
}
Also used : Metrics(org.apache.kafka.common.metrics.Metrics) StreamsMetrics(org.apache.kafka.streams.StreamsMetrics) TopologyBuilder(org.apache.kafka.streams.processor.TopologyBuilder) MockClientSupplier(org.apache.kafka.test.MockClientSupplier) MockTime(org.apache.kafka.common.utils.MockTime) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Test(org.junit.Test)

Example 22 with TopologyBuilder

use of org.apache.kafka.streams.processor.TopologyBuilder in project kafka by apache.

the class StreamThreadTest method shouldNotNullPointerWhenStandbyTasksAssignedAndNoStateStoresForTopology.

@Test
public void shouldNotNullPointerWhenStandbyTasksAssignedAndNoStateStoresForTopology() throws Exception {
    final TopologyBuilder builder = new TopologyBuilder();
    builder.setApplicationId(applicationId).addSource("name", "topic").addSink("out", "output");
    final StreamsConfig config = new StreamsConfig(configProps());
    final StreamThread thread = new StreamThread(builder, config, new MockClientSupplier(), applicationId, clientId, processId, new Metrics(), new MockTime(), new StreamsMetadataState(builder, StreamsMetadataState.UNKNOWN_HOST), 0);
    thread.partitionAssignor(new StreamPartitionAssignor() {

        @Override
        Map<TaskId, Set<TopicPartition>> standbyTasks() {
            return Collections.singletonMap(new TaskId(0, 0), Utils.mkSet(new TopicPartition("topic", 0)));
        }
    });
    thread.rebalanceListener.onPartitionsRevoked(Collections.<TopicPartition>emptyList());
    thread.rebalanceListener.onPartitionsAssigned(Collections.<TopicPartition>emptyList());
}
Also used : Metrics(org.apache.kafka.common.metrics.Metrics) StreamsMetrics(org.apache.kafka.streams.StreamsMetrics) TaskId(org.apache.kafka.streams.processor.TaskId) TopologyBuilder(org.apache.kafka.streams.processor.TopologyBuilder) MockClientSupplier(org.apache.kafka.test.MockClientSupplier) TopicPartition(org.apache.kafka.common.TopicPartition) Map(java.util.Map) HashMap(java.util.HashMap) MockTime(org.apache.kafka.common.utils.MockTime) StreamsConfig(org.apache.kafka.streams.StreamsConfig) Test(org.junit.Test)

Example 23 with TopologyBuilder

use of org.apache.kafka.streams.processor.TopologyBuilder in project incubator-rya by apache.

the class TopologyFactory method build.

@Override
public TopologyBuilder build(final String sparqlQuery, final String statementsTopic, final String resultsTopic, final BNodeIdFactory bNodeIdFactory) throws MalformedQueryException, TopologyBuilderException {
    requireNonNull(sparqlQuery);
    requireNonNull(statementsTopic);
    requireNonNull(resultsTopic);
    final ParsedQuery parsedQuery = new SPARQLParser().parseQuery(sparqlQuery, null);
    final TopologyBuilder builder = new TopologyBuilder();
    final TupleExpr expr = parsedQuery.getTupleExpr();
    final QueryVisitor visitor = new QueryVisitor(bNodeIdFactory);
    expr.visit(visitor);
    processorEntryList = visitor.getProcessorEntryList();
    final Map<TupleExpr, String> idMap = visitor.getIDs();
    // add source node
    builder.addSource(SOURCE, new StringDeserializer(), new VisibilityStatementDeserializer(), statementsTopic);
    // processing the processor entry list in reverse order means we go from leaf
    // nodes -> parent nodes.
    // So, when the parent processing nodes get added, the upstream
    // processing node will already exist.
    ProcessorEntry entry = null;
    for (int ii = processorEntryList.size() - 1; ii >= 0; ii--) {
        entry = processorEntryList.get(ii);
        // statement patterns need to be connected to the Source.
        if (entry.getNode() instanceof StatementPattern) {
            builder.addProcessor(entry.getID(), entry.getSupplier(), SOURCE);
        } else {
            final List<TupleExpr> parents = entry.getUpstreamNodes();
            final String[] parentIDs = new String[parents.size()];
            for (int id = 0; id < parents.size(); id++) {
                parentIDs[id] = idMap.get(parents.get(id));
            }
            builder.addProcessor(entry.getID(), entry.getSupplier(), parentIDs);
        }
        // Add a state store for any node type that requires one.
        if (entry.getNode() instanceof Join || entry.getNode() instanceof LeftJoin || entry.getNode() instanceof Group) {
            // Add a state store for the join processor.
            final StateStoreSupplier joinStoreSupplier = Stores.create(entry.getID()).withStringKeys().withValues(new VisibilityBindingSetSerde()).persistent().build();
            builder.addStateStore(joinStoreSupplier, entry.getID());
        }
    }
    // Add a formatter that converts the ProcessorResults into the output format.
    final SinkEntry<?, ?> sinkEntry = visitor.getSinkEntry();
    builder.addProcessor("OUTPUT_FORMATTER", sinkEntry.getFormatterSupplier(), entry.getID());
    // Add the sink.
    builder.addSink(SINK, resultsTopic, sinkEntry.getKeySerializer(), sinkEntry.getValueSerializer(), "OUTPUT_FORMATTER");
    return builder;
}
Also used : Group(org.openrdf.query.algebra.Group) SPARQLParser(org.openrdf.query.parser.sparql.SPARQLParser) ParsedQuery(org.openrdf.query.parser.ParsedQuery) TopologyBuilder(org.apache.kafka.streams.processor.TopologyBuilder) StringDeserializer(org.apache.kafka.common.serialization.StringDeserializer) LeftJoin(org.openrdf.query.algebra.LeftJoin) LeftOuterJoin(org.apache.rya.api.function.join.LeftOuterJoin) Join(org.openrdf.query.algebra.Join) IterativeJoin(org.apache.rya.api.function.join.IterativeJoin) NaturalJoin(org.apache.rya.api.function.join.NaturalJoin) VisibilityBindingSetSerde(org.apache.rya.streams.kafka.serialization.VisibilityBindingSetSerde) VisibilityStatementDeserializer(org.apache.rya.streams.kafka.serialization.VisibilityStatementDeserializer) TupleExpr(org.openrdf.query.algebra.TupleExpr) StatementPattern(org.openrdf.query.algebra.StatementPattern) LeftJoin(org.openrdf.query.algebra.LeftJoin) StateStoreSupplier(org.apache.kafka.streams.processor.StateStoreSupplier)

Example 24 with TopologyBuilder

use of org.apache.kafka.streams.processor.TopologyBuilder in project incubator-rya by apache.

the class AggregationProcessorIT method multipleAggregations.

@Test
public void multipleAggregations() throws Exception {
    // A query that figures out what the youngest and oldest ages are across all people.
    final String sparql = "SELECT (min(?age) as ?youngest) (max(?age) as ?oldest) " + "WHERE { " + "?person <urn:age> ?age " + "}";
    // Create the statements that will be input into the query..
    final ValueFactory vf = new ValueFactoryImpl();
    final List<VisibilityStatement> statements = new ArrayList<>();
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:Alice"), vf.createURI("urn:age"), vf.createLiteral(13)), ""));
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:Bob"), vf.createURI("urn:age"), vf.createLiteral(14)), ""));
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:Charlie"), vf.createURI("urn:age"), vf.createLiteral(7)), ""));
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:David"), vf.createURI("urn:age"), vf.createLiteral(5)), ""));
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:Eve"), vf.createURI("urn:age"), vf.createLiteral(25)), ""));
    // Make the expected results.
    final Set<VisibilityBindingSet> expected = new HashSet<>();
    MapBindingSet bs = new MapBindingSet();
    bs.addBinding("youngest", vf.createLiteral(13));
    bs.addBinding("oldest", vf.createLiteral(13));
    expected.add(new VisibilityBindingSet(bs, ""));
    bs = new MapBindingSet();
    bs.addBinding("youngest", vf.createLiteral(13));
    bs.addBinding("oldest", vf.createLiteral(14));
    expected.add(new VisibilityBindingSet(bs, ""));
    bs = new MapBindingSet();
    bs.addBinding("youngest", vf.createLiteral(7));
    bs.addBinding("oldest", vf.createLiteral(14));
    expected.add(new VisibilityBindingSet(bs, ""));
    bs = new MapBindingSet();
    bs.addBinding("youngest", vf.createLiteral(5));
    bs.addBinding("oldest", vf.createLiteral(14));
    expected.add(new VisibilityBindingSet(bs, ""));
    bs = new MapBindingSet();
    bs.addBinding("youngest", vf.createLiteral(5));
    bs.addBinding("oldest", vf.createLiteral(25));
    expected.add(new VisibilityBindingSet(bs, ""));
    // Enumerate some topics that will be re-used
    final String ryaInstance = UUID.randomUUID().toString();
    final UUID queryId = UUID.randomUUID();
    final String statementsTopic = KafkaTopics.statementsTopic(ryaInstance);
    final String resultsTopic = KafkaTopics.queryResultsTopic(ryaInstance, queryId);
    // Setup a topology.
    final TopologyBuilder builder = new TopologyFactory().build(sparql, statementsTopic, resultsTopic, new RandomUUIDFactory());
    // Run the test.
    RyaStreamsTestUtil.runStreamProcessingTest(kafka, statementsTopic, resultsTopic, builder, statements, expected, VisibilityBindingSetDeserializer.class);
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) TopologyBuilder(org.apache.kafka.streams.processor.TopologyBuilder) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) ArrayList(java.util.ArrayList) ValueFactory(org.openrdf.model.ValueFactory) TopologyFactory(org.apache.rya.streams.kafka.topology.TopologyFactory) VisibilityStatement(org.apache.rya.api.model.VisibilityStatement) RandomUUIDFactory(org.apache.rya.api.function.projection.RandomUUIDFactory) MapBindingSet(org.openrdf.query.impl.MapBindingSet) UUID(java.util.UUID) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 25 with TopologyBuilder

use of org.apache.kafka.streams.processor.TopologyBuilder in project incubator-rya by apache.

the class AggregationProcessorIT method count.

@Test
public void count() throws Exception {
    // A query that figures out how many books each person has.
    final String sparql = "SELECT ?person (count(?book) as ?bookCount) " + "WHERE { " + "?person <urn:hasBook> ?book " + "} GROUP BY ?person";
    // Create the statements that will be input into the query..
    final ValueFactory vf = new ValueFactoryImpl();
    final List<VisibilityStatement> statements = new ArrayList<>();
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:Alice"), vf.createURI("urn:hasBook"), vf.createLiteral("Book 1")), "a"));
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:Bob"), vf.createURI("urn:hasBook"), vf.createLiteral("Book 1")), ""));
    statements.add(new VisibilityStatement(vf.createStatement(vf.createURI("urn:Alice"), vf.createURI("urn:hasBook"), vf.createLiteral("Book 2")), "b"));
    // Make the expected results.
    final Set<VisibilityBindingSet> expected = new HashSet<>();
    MapBindingSet bs = new MapBindingSet();
    bs.addBinding("person", vf.createURI("urn:Alice"));
    bs.addBinding("bookCount", vf.createLiteral("1", XMLSchema.INTEGER));
    expected.add(new VisibilityBindingSet(bs, "a"));
    bs = new MapBindingSet();
    bs.addBinding("person", vf.createURI("urn:Bob"));
    bs.addBinding("bookCount", vf.createLiteral("1", XMLSchema.INTEGER));
    expected.add(new VisibilityBindingSet(bs, ""));
    bs = new MapBindingSet();
    bs.addBinding("person", vf.createURI("urn:Alice"));
    bs.addBinding("bookCount", vf.createLiteral("2", XMLSchema.INTEGER));
    expected.add(new VisibilityBindingSet(bs, "a&b"));
    // Enumerate some topics that will be re-used
    final String ryaInstance = UUID.randomUUID().toString();
    final UUID queryId = UUID.randomUUID();
    final String statementsTopic = KafkaTopics.statementsTopic(ryaInstance);
    final String resultsTopic = KafkaTopics.queryResultsTopic(ryaInstance, queryId);
    // Setup a topology.
    final TopologyBuilder builder = new TopologyFactory().build(sparql, statementsTopic, resultsTopic, new RandomUUIDFactory());
    // Run the test.
    RyaStreamsTestUtil.runStreamProcessingTest(kafka, statementsTopic, resultsTopic, builder, statements, expected, VisibilityBindingSetDeserializer.class);
}
Also used : VisibilityBindingSet(org.apache.rya.api.model.VisibilityBindingSet) TopologyBuilder(org.apache.kafka.streams.processor.TopologyBuilder) ValueFactoryImpl(org.openrdf.model.impl.ValueFactoryImpl) ArrayList(java.util.ArrayList) ValueFactory(org.openrdf.model.ValueFactory) TopologyFactory(org.apache.rya.streams.kafka.topology.TopologyFactory) VisibilityStatement(org.apache.rya.api.model.VisibilityStatement) RandomUUIDFactory(org.apache.rya.api.function.projection.RandomUUIDFactory) MapBindingSet(org.openrdf.query.impl.MapBindingSet) UUID(java.util.UUID) HashSet(java.util.HashSet) Test(org.junit.Test)

Aggregations

TopologyBuilder (org.apache.kafka.streams.processor.TopologyBuilder)38 Test (org.junit.Test)34 HashSet (java.util.HashSet)27 UUID (java.util.UUID)25 RandomUUIDFactory (org.apache.rya.api.function.projection.RandomUUIDFactory)25 VisibilityStatement (org.apache.rya.api.model.VisibilityStatement)24 TopologyFactory (org.apache.rya.streams.kafka.topology.TopologyFactory)24 ValueFactory (org.openrdf.model.ValueFactory)24 ValueFactoryImpl (org.openrdf.model.impl.ValueFactoryImpl)24 VisibilityBindingSet (org.apache.rya.api.model.VisibilityBindingSet)23 ArrayList (java.util.ArrayList)20 MapBindingSet (org.openrdf.query.impl.MapBindingSet)19 StreamsConfig (org.apache.kafka.streams.StreamsConfig)10 Metrics (org.apache.kafka.common.metrics.Metrics)8 MockClientSupplier (org.apache.kafka.test.MockClientSupplier)8 StreamsMetrics (org.apache.kafka.streams.StreamsMetrics)7 TaskId (org.apache.kafka.streams.processor.TaskId)7 TopicPartition (org.apache.kafka.common.TopicPartition)6 MockTime (org.apache.kafka.common.utils.MockTime)6 Properties (java.util.Properties)5