Search in sources :

Example 6 with BatchSource

use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.

the class ObservableResultsTest method batchJobFails.

@Test
public void batchJobFails() {
    BatchSource<String> errorSource = SourceBuilder.batch("error-source", x -> null).<String>fillBufferFn((in, Void) -> {
        throw new RuntimeException("Intentionally thrown!");
    }).destroyFn(ConsumerEx.noop()).build();
    Pipeline pipeline = Pipeline.create();
    pipeline.readFrom(errorSource).writeTo(Sinks.observable(observableName));
    // when
    Job job = hz().getJet().newJob(pipeline);
    assertTrueEventually(() -> assertEquals(JobStatus.FAILED, job.getStatus()));
    // then
    assertSortedValues(testObserver);
    assertError(testObserver, "Intentionally thrown!");
    assertCompletions(testObserver, 0);
}
Also used : ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) Arrays(java.util.Arrays) QuickTest(com.hazelcast.test.annotation.QuickTest) Observable(com.hazelcast.jet.Observable) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) BatchStage(com.hazelcast.jet.pipeline.BatchStage) Future(java.util.concurrent.Future) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Assert.fail(org.junit.Assert.fail) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Job(com.hazelcast.jet.Job) TestInClusterSupport(com.hazelcast.jet.TestInClusterSupport) Before(org.junit.Before) RingbufferProxy(com.hazelcast.ringbuffer.impl.RingbufferProxy) LongStream(java.util.stream.LongStream) BatchSource(com.hazelcast.jet.pipeline.BatchSource) Pipeline(com.hazelcast.jet.pipeline.Pipeline) SimpleEvent(com.hazelcast.jet.pipeline.test.SimpleEvent) Assert.assertNotNull(org.junit.Assert.assertNotNull) Sinks(com.hazelcast.jet.pipeline.Sinks) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) ConsumerEx(com.hazelcast.function.ConsumerEx) Test(org.junit.Test) Observer(com.hazelcast.jet.function.Observer) UUID(java.util.UUID) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) TestSources(com.hazelcast.jet.pipeline.test.TestSources) List(java.util.List) Stream(java.util.stream.Stream) Assert.assertNull(org.junit.Assert.assertNull) Ignore(org.junit.Ignore) Assert.assertFalse(org.junit.Assert.assertFalse) SourceBuilder(com.hazelcast.jet.pipeline.SourceBuilder) Comparator(java.util.Comparator) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Job(com.hazelcast.jet.Job) Pipeline(com.hazelcast.jet.pipeline.Pipeline) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) QuickTest(com.hazelcast.test.annotation.QuickTest) Test(org.junit.Test)

Example 7 with BatchSource

use of com.hazelcast.jet.pipeline.BatchSource in project gora by apache.

the class JetTest method testNewJetSource.

@Test
public void testNewJetSource() throws Exception {
    DataStore<Long, Pageview> dataStoreIn;
    dataStoreIn = DataStoreFactory.getDataStore(Long.class, Pageview.class, utility.getConfiguration());
    dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
    query = dataStoreIn.newQuery();
    query.setStartKey(0L);
    query.setEndKey(55L);
    JetEngine<Long, Pageview, Long, ResultPageView> jetEngine = new JetEngine<>();
    BatchSource<JetInputOutputFormat<Long, Pageview>> fileSource = jetEngine.createDataSource(dataStoreIn, query);
    Pipeline p = Pipeline.create();
    p.drawFrom(fileSource).filter(item -> item.getValue().getIp().toString().equals("88.240.129.183")).map(e -> {
        ResultPageView resultPageView = new ResultPageView();
        resultPageView.setIp(e.getValue().getIp());
        resultPageView.setTimestamp(e.getValue().getTimestamp());
        resultPageView.setUrl(e.getValue().getUrl());
        return new JetInputOutputFormat<Long, ResultPageView>(e.getValue().getTimestamp(), resultPageView);
    }).drainTo(jetEngine.createDataSink(dataStoreOut));
    JetInstance jet = Jet.newJetInstance();
    Jet.newJetInstance();
    try {
        jet.newJob(p).join();
    } finally {
        Jet.shutdownAll();
    }
    Query<Long, ResultPageView> query = dataStoreOut.newQuery();
    Result<Long, ResultPageView> result = query.execute();
    int noOfOutputRecords = 0;
    String ip = "";
    while (result.next()) {
        noOfOutputRecords++;
        ip = result.get().getIp().toString();
        assertEquals("88.240.129.183", ip);
    }
    assertEquals(2, noOfOutputRecords);
}
Also used : AggregateOperations.counting(com.hazelcast.jet.aggregate.AggregateOperations.counting) JetInstance(com.hazelcast.jet.JetInstance) BatchSource(com.hazelcast.jet.pipeline.BatchSource) BeforeClass(org.junit.BeforeClass) Pipeline(com.hazelcast.jet.pipeline.Pipeline) DataStore(org.apache.gora.store.DataStore) Functions.wholeItem(com.hazelcast.jet.function.Functions.wholeItem) Sinks(com.hazelcast.jet.pipeline.Sinks) Test(org.junit.Test) Pageview(org.apache.gora.jet.generated.Pageview) ResultPageView(org.apache.gora.jet.generated.ResultPageView) Result(org.apache.gora.query.Result) DataStoreFactory(org.apache.gora.store.DataStoreFactory) IMap(com.hazelcast.core.IMap) HBaseTestingUtility(org.apache.hadoop.hbase.HBaseTestingUtility) Query(org.apache.gora.query.Query) Traversers.traverseArray(com.hazelcast.jet.Traversers.traverseArray) Jet(com.hazelcast.jet.Jet) Pattern(java.util.regex.Pattern) GoraException(org.apache.gora.util.GoraException) Assert.assertEquals(org.junit.Assert.assertEquals) JetInstance(com.hazelcast.jet.JetInstance) Pipeline(com.hazelcast.jet.pipeline.Pipeline) Pageview(org.apache.gora.jet.generated.Pageview) ResultPageView(org.apache.gora.jet.generated.ResultPageView) Test(org.junit.Test)

Example 8 with BatchSource

use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.

the class ObservableResultsTest method sinkConsumesThrowables.

@Test
public void sinkConsumesThrowables() throws Exception {
    Pipeline pipeline = Pipeline.create();
    BatchSource<Throwable> input = TestSources.items(new RuntimeException("runtime_exception"), new Exception("exception"), new Error("error"), new Throwable("throwable"));
    pipeline.readFrom(input).writeTo(Sinks.observable("throwables"));
    // when
    hz().getJet().newJob(pipeline).join();
    List<Object> results = hz().getJet().getObservable("throwables").toFuture(s -> {
        Comparator<Object> comparator = Comparator.comparing(o -> ((Throwable) o).getMessage());
        return s.sorted(comparator).collect(Collectors.toList());
    }).get();
    // then
    assertEquals(4, results.size());
    assertEquals("error", ((Throwable) results.get(0)).getMessage());
    assertEquals("exception", ((Throwable) results.get(1)).getMessage());
    assertEquals("runtime_exception", ((Throwable) results.get(2)).getMessage());
    assertEquals("throwable", ((Throwable) results.get(3)).getMessage());
}
Also used : ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) Arrays(java.util.Arrays) QuickTest(com.hazelcast.test.annotation.QuickTest) Observable(com.hazelcast.jet.Observable) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) BatchStage(com.hazelcast.jet.pipeline.BatchStage) Future(java.util.concurrent.Future) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Assert.fail(org.junit.Assert.fail) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Job(com.hazelcast.jet.Job) TestInClusterSupport(com.hazelcast.jet.TestInClusterSupport) Before(org.junit.Before) RingbufferProxy(com.hazelcast.ringbuffer.impl.RingbufferProxy) LongStream(java.util.stream.LongStream) BatchSource(com.hazelcast.jet.pipeline.BatchSource) Pipeline(com.hazelcast.jet.pipeline.Pipeline) SimpleEvent(com.hazelcast.jet.pipeline.test.SimpleEvent) Assert.assertNotNull(org.junit.Assert.assertNotNull) Sinks(com.hazelcast.jet.pipeline.Sinks) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) ConsumerEx(com.hazelcast.function.ConsumerEx) Test(org.junit.Test) Observer(com.hazelcast.jet.function.Observer) UUID(java.util.UUID) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) TestSources(com.hazelcast.jet.pipeline.test.TestSources) List(java.util.List) Stream(java.util.stream.Stream) Assert.assertNull(org.junit.Assert.assertNull) Ignore(org.junit.Ignore) Assert.assertFalse(org.junit.Assert.assertFalse) SourceBuilder(com.hazelcast.jet.pipeline.SourceBuilder) Comparator(java.util.Comparator) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Pipeline(com.hazelcast.jet.pipeline.Pipeline) Comparator(java.util.Comparator) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) QuickTest(com.hazelcast.test.annotation.QuickTest) Test(org.junit.Test)

Example 9 with BatchSource

use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.

the class ObservableResultsTest method observableRegisteredAfterJobFailedGetError.

@Test
public void observableRegisteredAfterJobFailedGetError() {
    BatchSource<String> errorSource = SourceBuilder.batch("error-source", x -> null).<String>fillBufferFn((in, Void) -> {
        throw new RuntimeException("Intentionally thrown!");
    }).destroyFn(ConsumerEx.noop()).build();
    Pipeline pipeline = Pipeline.create();
    pipeline.readFrom(errorSource).writeTo(Sinks.observable(observableName));
    Job job = hz().getJet().newJob(pipeline);
    assertTrueEventually(() -> assertEquals(JobStatus.FAILED, job.getStatus()));
    // when
    TestObserver otherTestObserver = new TestObserver();
    Observable<Long> lateObservable = hz().getJet().getObservable(observableName);
    lateObservable.addObserver(otherTestObserver);
    // then
    assertSortedValues(testObserver);
    assertError(testObserver, "Intentionally thrown!");
    assertCompletions(testObserver, 0);
}
Also used : ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) Arrays(java.util.Arrays) QuickTest(com.hazelcast.test.annotation.QuickTest) Observable(com.hazelcast.jet.Observable) AtomicReference(java.util.concurrent.atomic.AtomicReference) ArrayList(java.util.ArrayList) BatchStage(com.hazelcast.jet.pipeline.BatchStage) Future(java.util.concurrent.Future) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) After(org.junit.After) Assert.fail(org.junit.Assert.fail) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) Job(com.hazelcast.jet.Job) TestInClusterSupport(com.hazelcast.jet.TestInClusterSupport) Before(org.junit.Before) RingbufferProxy(com.hazelcast.ringbuffer.impl.RingbufferProxy) LongStream(java.util.stream.LongStream) BatchSource(com.hazelcast.jet.pipeline.BatchSource) Pipeline(com.hazelcast.jet.pipeline.Pipeline) SimpleEvent(com.hazelcast.jet.pipeline.test.SimpleEvent) Assert.assertNotNull(org.junit.Assert.assertNotNull) Sinks(com.hazelcast.jet.pipeline.Sinks) Set(java.util.Set) Assert.assertTrue(org.junit.Assert.assertTrue) ConsumerEx(com.hazelcast.function.ConsumerEx) Test(org.junit.Test) Observer(com.hazelcast.jet.function.Observer) UUID(java.util.UUID) Category(org.junit.experimental.categories.Category) Collectors(java.util.stream.Collectors) TestSources(com.hazelcast.jet.pipeline.test.TestSources) List(java.util.List) Stream(java.util.stream.Stream) Assert.assertNull(org.junit.Assert.assertNull) Ignore(org.junit.Ignore) Assert.assertFalse(org.junit.Assert.assertFalse) SourceBuilder(com.hazelcast.jet.pipeline.SourceBuilder) Comparator(java.util.Comparator) Collections(java.util.Collections) Assert.assertEquals(org.junit.Assert.assertEquals) Job(com.hazelcast.jet.Job) Pipeline(com.hazelcast.jet.pipeline.Pipeline) ParallelJVMTest(com.hazelcast.test.annotation.ParallelJVMTest) QuickTest(com.hazelcast.test.annotation.QuickTest) Test(org.junit.Test)

Example 10 with BatchSource

use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.

the class S3Sources method s3.

/**
 * Creates an AWS S3 {@link BatchSource} which lists all the objects in the
 * bucket-list using given {@code prefix}, reads them line by line,
 * transforms each line to the desired output object using given {@code
 * mapFn} and emits them to downstream.
 * <p>
 * The source does not save any state to snapshot. If the job is restarted,
 * it will re-emit all entries.
 * <p>
 * The default local parallelism for this processor is 2.
 * <p>
 * Here is an example which reads the objects from a single bucket with
 * applying the given prefix.
 *
 * <pre>{@code
 * Pipeline p = Pipeline.create();
 * BatchStage<String> srcStage = p.readFrom(S3Sources.s3(
 *      Arrays.asList("bucket1", "bucket2"),
 *      "prefix",
 *      StandardCharsets.UTF_8,
 *      () -> S3Client.create(),
 *      (filename, line) -> line
 * ));
 * }</pre>
 *
 * @param bucketNames    list of bucket-names
 * @param prefix         the prefix to filter the objects. Optional, passing
 *                       {@code null} will list all objects.
 * @param clientSupplier function which returns the s3 client to use
 *                       one client per processor instance is used
 * @param mapFn          the function which creates output object from each
 *                       line. Gets the object name and line as parameters
 * @param <T>            the type of the items the source emits
 */
@Nonnull
public static <T> BatchSource<T> s3(@Nonnull List<String> bucketNames, @Nullable String prefix, @Nonnull Charset charset, @Nonnull SupplierEx<? extends S3Client> clientSupplier, @Nonnull BiFunctionEx<String, String, ? extends T> mapFn) {
    String charsetName = charset.name();
    FunctionEx<InputStream, Stream<String>> readFileFn = responseInputStream -> {
        BufferedReader reader = new BufferedReader(new InputStreamReader(responseInputStream, Charset.forName(charsetName)));
        return reader.lines();
    };
    return s3(bucketNames, prefix, clientSupplier, readFileFn, mapFn);
}
Also used : Traverser(com.hazelcast.jet.Traverser) S3Object(software.amazon.awssdk.services.s3.model.S3Object) Traversers.traverseStream(com.hazelcast.jet.Traversers.traverseStream) GetObjectResponse(software.amazon.awssdk.services.s3.model.GetObjectResponse) BiFunctionEx(com.hazelcast.function.BiFunctionEx) Charset(java.nio.charset.Charset) Util.entry(com.hazelcast.jet.Util.entry) GetObjectRequest(software.amazon.awssdk.services.s3.model.GetObjectRequest) SourceBuffer(com.hazelcast.jet.pipeline.SourceBuilder.SourceBuffer) ResponseInputStream(software.amazon.awssdk.core.ResponseInputStream) Nonnull(javax.annotation.Nonnull) Nullable(javax.annotation.Nullable) FunctionEx(com.hazelcast.function.FunctionEx) BatchSource(com.hazelcast.jet.pipeline.BatchSource) Iterator(java.util.Iterator) S3Client(software.amazon.awssdk.services.s3.S3Client) UTF_8(java.nio.charset.StandardCharsets.UTF_8) InputStreamReader(java.io.InputStreamReader) SupplierEx(com.hazelcast.function.SupplierEx) StandardCharsets(java.nio.charset.StandardCharsets) List(java.util.List) Stream(java.util.stream.Stream) Context(com.hazelcast.jet.core.Processor.Context) Entry(java.util.Map.Entry) TriFunction(com.hazelcast.jet.function.TriFunction) BufferedReader(java.io.BufferedReader) SourceBuilder(com.hazelcast.jet.pipeline.SourceBuilder) InputStream(java.io.InputStream) InputStreamReader(java.io.InputStreamReader) ResponseInputStream(software.amazon.awssdk.core.ResponseInputStream) InputStream(java.io.InputStream) BufferedReader(java.io.BufferedReader) Traversers.traverseStream(com.hazelcast.jet.Traversers.traverseStream) ResponseInputStream(software.amazon.awssdk.core.ResponseInputStream) Stream(java.util.stream.Stream) InputStream(java.io.InputStream) Nonnull(javax.annotation.Nonnull)

Aggregations

BatchSource (com.hazelcast.jet.pipeline.BatchSource)12 Pipeline (com.hazelcast.jet.pipeline.Pipeline)10 Test (org.junit.Test)10 Sinks (com.hazelcast.jet.pipeline.Sinks)9 SourceBuilder (com.hazelcast.jet.pipeline.SourceBuilder)6 List (java.util.List)6 ImmutableMap.of (com.google.common.collect.ImmutableMap.of)5 IOException (java.io.IOException)5 Nonnull (javax.annotation.Nonnull)5 Nullable (javax.annotation.Nullable)5 Assertions.assertThatThrownBy (org.assertj.core.api.Assertions.assertThatThrownBy)5 SearchRequest (org.elasticsearch.action.search.SearchRequest)5 Assert.assertEquals (org.junit.Assert.assertEquals)5 TestSources (com.hazelcast.jet.pipeline.test.TestSources)4 ParallelJVMTest (com.hazelcast.test.annotation.ParallelJVMTest)4 QuickTest (com.hazelcast.test.annotation.QuickTest)4 ConsumerEx (com.hazelcast.function.ConsumerEx)3 Job (com.hazelcast.jet.Job)3 Observable (com.hazelcast.jet.Observable)3 TestInClusterSupport (com.hazelcast.jet.TestInClusterSupport)3