use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.
the class ObservableResultsTest method batchJobFails.
@Test
public void batchJobFails() {
BatchSource<String> errorSource = SourceBuilder.batch("error-source", x -> null).<String>fillBufferFn((in, Void) -> {
throw new RuntimeException("Intentionally thrown!");
}).destroyFn(ConsumerEx.noop()).build();
Pipeline pipeline = Pipeline.create();
pipeline.readFrom(errorSource).writeTo(Sinks.observable(observableName));
// when
Job job = hz().getJet().newJob(pipeline);
assertTrueEventually(() -> assertEquals(JobStatus.FAILED, job.getStatus()));
// then
assertSortedValues(testObserver);
assertError(testObserver, "Intentionally thrown!");
assertCompletions(testObserver, 0);
}
use of com.hazelcast.jet.pipeline.BatchSource in project gora by apache.
the class JetTest method testNewJetSource.
@Test
public void testNewJetSource() throws Exception {
DataStore<Long, Pageview> dataStoreIn;
dataStoreIn = DataStoreFactory.getDataStore(Long.class, Pageview.class, utility.getConfiguration());
dataStoreOut = DataStoreFactory.getDataStore(Long.class, ResultPageView.class, utility.getConfiguration());
query = dataStoreIn.newQuery();
query.setStartKey(0L);
query.setEndKey(55L);
JetEngine<Long, Pageview, Long, ResultPageView> jetEngine = new JetEngine<>();
BatchSource<JetInputOutputFormat<Long, Pageview>> fileSource = jetEngine.createDataSource(dataStoreIn, query);
Pipeline p = Pipeline.create();
p.drawFrom(fileSource).filter(item -> item.getValue().getIp().toString().equals("88.240.129.183")).map(e -> {
ResultPageView resultPageView = new ResultPageView();
resultPageView.setIp(e.getValue().getIp());
resultPageView.setTimestamp(e.getValue().getTimestamp());
resultPageView.setUrl(e.getValue().getUrl());
return new JetInputOutputFormat<Long, ResultPageView>(e.getValue().getTimestamp(), resultPageView);
}).drainTo(jetEngine.createDataSink(dataStoreOut));
JetInstance jet = Jet.newJetInstance();
Jet.newJetInstance();
try {
jet.newJob(p).join();
} finally {
Jet.shutdownAll();
}
Query<Long, ResultPageView> query = dataStoreOut.newQuery();
Result<Long, ResultPageView> result = query.execute();
int noOfOutputRecords = 0;
String ip = "";
while (result.next()) {
noOfOutputRecords++;
ip = result.get().getIp().toString();
assertEquals("88.240.129.183", ip);
}
assertEquals(2, noOfOutputRecords);
}
use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.
the class ObservableResultsTest method sinkConsumesThrowables.
@Test
public void sinkConsumesThrowables() throws Exception {
Pipeline pipeline = Pipeline.create();
BatchSource<Throwable> input = TestSources.items(new RuntimeException("runtime_exception"), new Exception("exception"), new Error("error"), new Throwable("throwable"));
pipeline.readFrom(input).writeTo(Sinks.observable("throwables"));
// when
hz().getJet().newJob(pipeline).join();
List<Object> results = hz().getJet().getObservable("throwables").toFuture(s -> {
Comparator<Object> comparator = Comparator.comparing(o -> ((Throwable) o).getMessage());
return s.sorted(comparator).collect(Collectors.toList());
}).get();
// then
assertEquals(4, results.size());
assertEquals("error", ((Throwable) results.get(0)).getMessage());
assertEquals("exception", ((Throwable) results.get(1)).getMessage());
assertEquals("runtime_exception", ((Throwable) results.get(2)).getMessage());
assertEquals("throwable", ((Throwable) results.get(3)).getMessage());
}
use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.
the class ObservableResultsTest method observableRegisteredAfterJobFailedGetError.
@Test
public void observableRegisteredAfterJobFailedGetError() {
BatchSource<String> errorSource = SourceBuilder.batch("error-source", x -> null).<String>fillBufferFn((in, Void) -> {
throw new RuntimeException("Intentionally thrown!");
}).destroyFn(ConsumerEx.noop()).build();
Pipeline pipeline = Pipeline.create();
pipeline.readFrom(errorSource).writeTo(Sinks.observable(observableName));
Job job = hz().getJet().newJob(pipeline);
assertTrueEventually(() -> assertEquals(JobStatus.FAILED, job.getStatus()));
// when
TestObserver otherTestObserver = new TestObserver();
Observable<Long> lateObservable = hz().getJet().getObservable(observableName);
lateObservable.addObserver(otherTestObserver);
// then
assertSortedValues(testObserver);
assertError(testObserver, "Intentionally thrown!");
assertCompletions(testObserver, 0);
}
use of com.hazelcast.jet.pipeline.BatchSource in project hazelcast by hazelcast.
the class S3Sources method s3.
/**
* Creates an AWS S3 {@link BatchSource} which lists all the objects in the
* bucket-list using given {@code prefix}, reads them line by line,
* transforms each line to the desired output object using given {@code
* mapFn} and emits them to downstream.
* <p>
* The source does not save any state to snapshot. If the job is restarted,
* it will re-emit all entries.
* <p>
* The default local parallelism for this processor is 2.
* <p>
* Here is an example which reads the objects from a single bucket with
* applying the given prefix.
*
* <pre>{@code
* Pipeline p = Pipeline.create();
* BatchStage<String> srcStage = p.readFrom(S3Sources.s3(
* Arrays.asList("bucket1", "bucket2"),
* "prefix",
* StandardCharsets.UTF_8,
* () -> S3Client.create(),
* (filename, line) -> line
* ));
* }</pre>
*
* @param bucketNames list of bucket-names
* @param prefix the prefix to filter the objects. Optional, passing
* {@code null} will list all objects.
* @param clientSupplier function which returns the s3 client to use
* one client per processor instance is used
* @param mapFn the function which creates output object from each
* line. Gets the object name and line as parameters
* @param <T> the type of the items the source emits
*/
@Nonnull
public static <T> BatchSource<T> s3(@Nonnull List<String> bucketNames, @Nullable String prefix, @Nonnull Charset charset, @Nonnull SupplierEx<? extends S3Client> clientSupplier, @Nonnull BiFunctionEx<String, String, ? extends T> mapFn) {
String charsetName = charset.name();
FunctionEx<InputStream, Stream<String>> readFileFn = responseInputStream -> {
BufferedReader reader = new BufferedReader(new InputStreamReader(responseInputStream, Charset.forName(charsetName)));
return reader.lines();
};
return s3(bucketNames, prefix, clientSupplier, readFileFn, mapFn);
}
Aggregations