Examples with FormatSpecification - co.cask.cdap.api.data.format.FormatSpecification

Example 36 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class SparkBatchSourceFactory method createInputRDD.

@SuppressWarnings("unchecked")
private <K, V> JavaPairRDD<K, V> createInputRDD(JavaSparkExecutionContext sec, JavaSparkContext jsc, String inputName, Class<K> keyClass, Class<V> valueClass) {
    if (streams.containsKey(inputName)) {
        Input.StreamInput streamInput = streams.get(inputName);
        FormatSpecification formatSpec = streamInput.getBodyFormatSpec();
        if (formatSpec != null) {
            return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), formatSpec, streamInput.getStartTime(), streamInput.getEndTime(), StructuredRecord.class);
        }
        String decoderType = streamInput.getDecoderType();
        if (decoderType == null) {
            return (JavaPairRDD<K, V>) sec.fromStream(streamInput.getName(), streamInput.getStartTime(), streamInput.getEndTime(), valueClass);
        } else {
            try {
                Class<StreamEventDecoder<K, V>> decoderClass = (Class<StreamEventDecoder<K, V>>) Thread.currentThread().getContextClassLoader().loadClass(decoderType);
                return sec.fromStream(streamInput.getName(), streamInput.getStartTime(), streamInput.getEndTime(), decoderClass, keyClass, valueClass);
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }
    }
    if (inputFormatProviders.containsKey(inputName)) {
        InputFormatProvider inputFormatProvider = inputFormatProviders.get(inputName);
        Configuration hConf = new Configuration();
        hConf.clear();
        for (Map.Entry<String, String> entry : inputFormatProvider.getInputFormatConfiguration().entrySet()) {
            hConf.set(entry.getKey(), entry.getValue());
        }
        ClassLoader classLoader = Objects.firstNonNull(currentThread().getContextClassLoader(), getClass().getClassLoader());
        try {
            @SuppressWarnings("unchecked") Class<InputFormat> inputFormatClass = (Class<InputFormat>) classLoader.loadClass(inputFormatProvider.getInputFormatClassName());
            return jsc.newAPIHadoopRDD(hConf, inputFormatClass, keyClass, valueClass);
        } catch (ClassNotFoundException e) {
            throw Throwables.propagate(e);
        }
    }
    if (datasetInfos.containsKey(inputName)) {
        DatasetInfo datasetInfo = datasetInfos.get(inputName);
        return sec.fromDataset(datasetInfo.getDatasetName(), datasetInfo.getDatasetArgs());
    }
    // which make sure one and only one of those source type will be specified.
    throw new IllegalStateException("Unknown source type");
}

Also used : InputFormatProvider(co.cask.cdap.api.data.batch.InputFormatProvider) Configuration(org.apache.hadoop.conf.Configuration) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) StreamEventDecoder(co.cask.cdap.api.stream.StreamEventDecoder) Input(co.cask.cdap.api.data.batch.Input) InputFormat(org.apache.hadoop.mapreduce.InputFormat) JavaPairRDD(org.apache.spark.api.java.JavaPairRDD) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) Map(java.util.Map)

Example 37 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class DescribeStreamCommand method perform.

@Override
public void perform(Arguments arguments, PrintStream output) throws Exception {
    StreamId streamId = cliConfig.getCurrentNamespace().stream(arguments.get(ArgumentName.STREAM.toString()));
    StreamProperties config = streamClient.getConfig(streamId);
    Table table = Table.builder().setHeader("ttl", "format", "schema", "notification.threshold.mb", "description").setRows(ImmutableList.of(config), new RowMaker<StreamProperties>() {

        @Override
        public List<?> makeRow(StreamProperties object) {
            FormatSpecification format = object.getFormat();
            return Lists.newArrayList(object.getTTL(), format.getName(), format.getSchema().toString(), object.getNotificationThresholdMB(), object.getDescription());
        }
    }).build();
    cliConfig.getTableRenderer().render(cliConfig, output, table);
}

Also used : StreamId(co.cask.cdap.proto.id.StreamId) Table(co.cask.cdap.cli.util.table.Table) RowMaker(co.cask.cdap.cli.util.RowMaker) StreamProperties(co.cask.cdap.proto.StreamProperties) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification)

Example 38 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class StreamInputFormatProvider method getInputFormatConfiguration.

@Override
public Map<String, String> getInputFormatConfiguration() {
    try {
        StreamConfig streamConfig = streamAdmin.getConfig(streamId);
        Location streamPath = StreamUtils.createGenerationLocation(streamConfig.getLocation(), StreamUtils.getGeneration(streamConfig));
        Configuration hConf = new Configuration();
        hConf.clear();
        AbstractStreamInputFormat.setStreamId(hConf, streamId);
        AbstractStreamInputFormat.setTTL(hConf, streamConfig.getTTL());
        AbstractStreamInputFormat.setStreamPath(hConf, streamPath.toURI());
        AbstractStreamInputFormat.setTimeRange(hConf, streamInput.getStartTime(), streamInput.getEndTime());
        FormatSpecification formatSpec = streamInput.getBodyFormatSpec();
        if (formatSpec != null) {
            AbstractStreamInputFormat.setBodyFormatSpecification(hConf, formatSpec);
        } else {
            String decoderType = streamInput.getDecoderType();
            if (decoderType != null) {
                AbstractStreamInputFormat.setDecoderClassName(hConf, decoderType);
            }
        }
        return ConfigurationUtil.toMap(hConf);
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}

Also used : Configuration(org.apache.hadoop.conf.Configuration) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) IOException(java.io.IOException) Location(org.apache.twill.filesystem.Location)

Example 39 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class StreamClientTestRun method testStreamDeleteAfterCreatingView.

@Test
public void testStreamDeleteAfterCreatingView() throws Exception {
    StreamId testStream = NamespaceId.DEFAULT.stream("testStream");
    streamClient.create(testStream);
    // should throw StreamNotFoundException if the stream has not been successfully created in the previous step
    streamClient.getConfig(testStream);
    StreamViewClient streamViewClient = new StreamViewClient(clientConfig);
    StreamViewId testView = testStream.view("testView");
    ViewSpecification testViewSpec = new ViewSpecification(new FormatSpecification("csv", null, null));
    Assert.assertTrue(streamViewClient.createOrUpdate(testView, testViewSpec));
    // test stream delete
    streamClient.delete(testStream);
    // recreate the stream and the view
    streamClient.create(testStream);
    // should throw StreamNotFoundException if the stream has not been successfully created in the previous step
    streamClient.getConfig(testStream);
    Assert.assertTrue(streamViewClient.createOrUpdate(testView, testViewSpec));
    // test that namespace deletion succeeds
    namespaceClient.delete(NamespaceId.DEFAULT);
}

Also used : StreamId(co.cask.cdap.proto.id.StreamId) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) ViewSpecification(co.cask.cdap.proto.ViewSpecification) StreamViewId(co.cask.cdap.proto.id.StreamViewId) Test(org.junit.Test)

Example 40 with FormatSpecification

use of co.cask.cdap.api.data.format.FormatSpecification in project cdap by caskdata.

the class StreamFormatSpecSpark method run.

@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
    JavaSparkContext jsc = new JavaSparkContext();
    SQLContext sqlContext = new SQLContext(jsc);
    // Read from CSV stream and turn it into a DataFrame
    String streamName = sec.getRuntimeArguments().get("stream.name");
    Schema schema = Schema.recordOf("record", ImmutableList.of(Schema.Field.of("name", Schema.of(Schema.Type.STRING)), Schema.Field.of("age", Schema.of(Schema.Type.INT))));
    FormatSpecification formatSpec = new FormatSpecification("csv", schema);
    JavaPairRDD<Long, GenericStreamEventData<StructuredRecord>> rdd = sec.fromStream(streamName, formatSpec, StructuredRecord.class);
    JavaRDD<Person> personRDD = rdd.values().map(new Function<GenericStreamEventData<StructuredRecord>, Person>() {

        @Override
        public Person call(GenericStreamEventData<StructuredRecord> data) throws Exception {
            StructuredRecord record = data.getBody();
            return new Person(record.<String>get("name"), record.<Integer>get("age"));
        }
    });
    sqlContext.createDataFrame(personRDD, Person.class).registerTempTable("people");
    // Execute a SQL on the table and save the result
    JavaPairRDD<String, Integer> resultRDD = sqlContext.sql(sec.getRuntimeArguments().get("sql.statement")).toJavaRDD().mapToPair(new PairFunction<Row, String, Integer>() {

        @Override
        public Tuple2<String, Integer> call(Row row) throws Exception {
            return new Tuple2<>(row.getString(0), row.getInt(1));
        }
    });
    sec.saveAsDataset(resultRDD, sec.getRuntimeArguments().get("output.dataset"));
}

Also used : Schema(co.cask.cdap.api.data.schema.Schema) FormatSpecification(co.cask.cdap.api.data.format.FormatSpecification) GenericStreamEventData(co.cask.cdap.api.stream.GenericStreamEventData) StructuredRecord(co.cask.cdap.api.data.format.StructuredRecord) Tuple2(scala.Tuple2) JavaSparkContext(org.apache.spark.api.java.JavaSparkContext) Row(org.apache.spark.sql.Row) SQLContext(org.apache.spark.sql.SQLContext)

Aggregations

FormatSpecification (co.cask.cdap.api.data.format.FormatSpecification)61 Test (org.junit.Test)43 Schema (co.cask.cdap.api.data.schema.Schema)32 StructuredRecord (co.cask.cdap.api.data.format.StructuredRecord)19 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)17 StreamId (co.cask.cdap.proto.id.StreamId)16 ViewSpecification (co.cask.cdap.proto.ViewSpecification)14 StreamProperties (co.cask.cdap.proto.StreamProperties)11 StreamViewId (co.cask.cdap.proto.id.StreamViewId)11 DatasetId (co.cask.cdap.proto.id.DatasetId)6 NamespaceMeta (co.cask.cdap.proto.NamespaceMeta)5 NamespaceId (co.cask.cdap.proto.id.NamespaceId)5 MetadataSearchResultRecord (co.cask.cdap.proto.metadata.MetadataSearchResultRecord)5 IOException (java.io.IOException)5 UnsupportedTypeException (co.cask.cdap.api.data.schema.UnsupportedTypeException)4 NotFoundException (co.cask.cdap.common.NotFoundException)3 ApplicationId (co.cask.cdap.proto.id.ApplicationId)3 ArtifactId (co.cask.cdap.proto.id.ArtifactId)3 ProgramId (co.cask.cdap.proto.id.ProgramId)3 HttpURLConnection (java.net.HttpURLConnection)3