use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class SparkBatchSourceFactory method createInputRDD.
@SuppressWarnings("unchecked")
private <K, V> JavaPairRDD<K, V> createInputRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String inputName, Class<K> keyClass, Class<V> valueClass) {
if (streams.containsKey(inputName)) {
Input.StreamInput streamInput = streams.get(inputName);
FormatSpecification formatSpec = streamInput.getBodyFormatSpec();
if (formatSpec != null) {
return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), formatSpec, streamInput.getStartTime(), streamInput.getEndTime(), StructuredRecord.class);
}
String decoderType = streamInput.getDecoderType();
if (decoderType == null) {
return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), streamInput.getStartTime(), streamInput.getEndTime(), valueClass);
} else {
try {
Class<StreamEventDecoder<K, V>> decoderClass = (Class<StreamEventDecoder<K, V>>) Thread.currentThread().getContextClassLoader().loadClass(decoderType);
return sec.fromStream(streamInput.getName(), streamInput.getStartTime(), streamInput.getEndTime(), decoderClass, keyClass, valueClass);
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
}
if (inputFormatProviders.containsKey(inputName)) {
InputFormatProvider inputFormatProvider = inputFormatProviders.get(inputName);
Configuration hConf = new Configuration();
hConf.clear();
for (Map.Entry<String, String> entry : inputFormatProvider.getInputFormatConfiguration().entrySet()) {
hConf.set(entry.getKey(), entry.getValue());
}
ClassLoader classLoader = Objects.firstNonNull(currentThread().getContextClassLoader(), getClass().getClassLoader());
try {
@SuppressWarnings("unchecked") Class<InputFormat> inputFormatClass = (Class<InputFormat>) classLoader.loadClass(inputFormatProvider.getInputFormatClassName());
return jsc.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass);
} catch (ClassNotFoundException e) {
throw Throwables.propagate(e);
}
}
if (datasetInfos.containsKey(inputName)) {
DatasetInfo datasetInfo = datasetInfos.get(inputName);
return sec.fromDataset(datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs());
}
// which make sure one and only one of those source type will be specified.
throw new IllegalStateException("Unknown source type");
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class DescribeStreamCommand method perform.
@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
StreamId streamId = cliConfig.getCurrentNamespace().stream(arguments.get(ArgumentName.STREAM.toString()));
StreamProperties config = streamClient.getConfig(streamId);
Table table = Table.builder().setHeader("ttl", "format", "schema", "notification.threshold.mb", "description").setRows(ImmutableList.of(config), new RowMaker<StreamProperties>() {
@Override
public List<?> makeRow(StreamProperties object) {
FormatSpecification format = object.getFormat();
return Lists.newArrayList(object.getTTL(), format.getName(), format.getSchema().toString(), object.getNotificationThresholdMB(), object.getDescription());
}
}).build();
cliConfig.getTableRenderer().render(cliConfig, output, table);
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class StreamInputFormatProvider method getInputFormatConfiguration.
@Override
public Map<String, String> getInputFormatConfiguration() {
try {
StreamConfig streamConfig = streamAdmin.getConfig(streamId);
Location streamPath = StreamUtils.createGenerationLocation(streamConfig.getLocation(), StreamUtils.getGeneration(streamConfig));
Configuration hConf = new Configuration();
hConf.clear();
AbstractStreamInputFormat.setStreamId(hConf, streamId);
AbstractStreamInputFormat.setTTL(hConf, streamConfig.getTTL());
AbstractStreamInputFormat.setStreamPath(hConf, streamPath.toURI());
AbstractStreamInputFormat.setTimeRange(hConf, streamInput.getStartTime(), streamInput.getEndTime());
FormatSpecification formatSpec = streamInput.getBodyFormatSpec();
if (formatSpec != null) {
AbstractStreamInputFormat.setBodyFormatSpecification(hConf, formatSpec);
} else {
String decoderType = streamInput.getDecoderType();
if (decoderType != null) {
AbstractStreamInputFormat.setDecoderClassName(hConf, decoderType);
}
}
return ConfigurationUtil.toMap(hConf);
} catch (IOException e) {
throw Throwables.propagate(e);
}
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class StreamClientTestRun method testStreamDeleteAfterCreatingView.
@Test
public void testStreamDeleteAfterCreatingView() throws Exception {
StreamId testStream = NamespaceId.DEFAULT.stream("testStream");
streamClient.create(testStream);
// should throw StreamNotFoundException if the stream has not been successfully created in the previous step
streamClient.getConfig(testStream);
StreamViewClient streamViewClient = new StreamViewClient(clientConfig);
StreamViewId testView = testStream.view("testView");
ViewSpecification testViewSpec = new ViewSpecification(new FormatSpecification("csv", null, null));
Assert.assertTrue(streamViewClient.createOrUpdate(testView, testViewSpec));
// test stream delete
streamClient.delete(testStream);
// recreate the stream and the view
streamClient.create(testStream);
// should throw StreamNotFoundException if the stream has not been successfully created in the previous step
streamClient.getConfig(testStream);
Assert.assertTrue(streamViewClient.createOrUpdate(testView, testViewSpec));
// test that namespace deletion succeeds
namespaceClient.delete(NamespaceId.DEFAULT);
}
use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.
the class StreamFormatSpecSpark method run.
@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
JavaSparkContext jsc = new JavaSparkContext();
SQLContext sqlContext = new SQLContext(jsc);
// Read from CSV stream and turn it into a DataFrame
String streamName = sec.getRuntimeArguments().get("stream.name");
Schema schema = Schema.recordOf("record", ImmutableList.of(Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.INT))));
FormatSpecification formatSpec = new FormatSpecification("csv", schema);
JavaPairRDD<Long, GenericStreamEventData<StructuredRecord>> rdd = sec.fromStream(streamName, formatSpec, StructuredRecord.class);
JavaRDD<Person> personRDD = rdd.values().map(new Function<GenericStreamEventData<StructuredRecord>, Person>() {
@Override
public Person call(GenericStreamEventData<StructuredRecord> data) throws Exception {
StructuredRecord record = data.getBody();
return new Person(record.<String>get("name"), record.<Integer>get("age"));
}
});
sqlContext.createDataFrame(personRDD, Person.class).registerTempTable("people");
// Execute a SQL on the table and save the result
JavaPairRDD<String, Integer> resultRDD = sqlContext.sql(sec.getRuntimeArguments().get("sql.statement")).toJavaRDD().mapToPair(new PairFunction<Row, String, Integer>() {
@Override
public Tuple2<String, Integer> call(Row row) throws Exception {
return new Tuple2<>(row.getString(0), row.getInt(1));
}
});
sec.saveAsDataset(resultRDD, sec.getRuntimeArguments().get("output.dataset"));
}
Aggregations