Search in sources :

Example 1 with CsvParser

use of org.apache.apex.malhar.contrib.parser.CsvParser in project apex-malhar by apache.

the class FusionStyleSQLApplication method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    SQLExecEnvironment env = SQLExecEnvironment.getEnvironment();
    env.registerFunction("APEXCONCAT", PureStyleSQLApplication.class, "apex_concat_str");
    Map<String, Class> fieldMapping = ImmutableMap.<String, Class>of("RowTime", Date.class, "id", Integer.class, "Product", String.class, "units", Integer.class);
    // Add Kafka Input
    KafkaSinglePortInputOperator kafkaInput = dag.addOperator("KafkaInput", KafkaSinglePortInputOperator.class);
    kafkaInput.setInitialOffset("EARLIEST");
    // Add CSVParser
    CsvParser csvParser = dag.addOperator("CSVParser", CsvParser.class);
    dag.addStream("KafkaToCSV", kafkaInput.outputPort, csvParser.in);
    // Register CSV Parser output as input table for first SQL
    env.registerTable(conf.get("sqlSchemaInputName"), new StreamEndpoint(csvParser.out, fieldMapping));
    // Register FileEndpoint as output table for second SQL.
    env.registerTable(conf.get("sqlSchemaOutputName"), new FileEndpoint(conf.get("folderPath"), conf.get("fileName"), new CSVMessageFormat(conf.get("sqlSchemaOutputDef"))));
    // Add second SQL to DAG
    env.executeSQL(dag, conf.get("sql"));
}
Also used : StreamEndpoint(org.apache.apex.malhar.sql.table.StreamEndpoint) CSVMessageFormat(org.apache.apex.malhar.sql.table.CSVMessageFormat) SQLExecEnvironment(org.apache.apex.malhar.sql.SQLExecEnvironment) KafkaSinglePortInputOperator(org.apache.apex.malhar.kafka.KafkaSinglePortInputOperator) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser) FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint)

Example 2 with CsvParser

use of org.apache.apex.malhar.contrib.parser.CsvParser in project apex-malhar by apache.

the class Application method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    FSRecordReaderModule recordReader = dag.addModule("recordReader", FSRecordReaderModule.class);
    CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class);
    CsvFormatter formatter = dag.addOperator("formatter", new CsvFormatter());
    StringFileOutputOperator fileOutput = dag.addOperator("fileOutput", new StringFileOutputOperator());
    dag.addStream("record", recordReader.records, csvParser.in);
    dag.addStream("pojo", csvParser.out, formatter.in);
    dag.addStream("string", formatter.out, fileOutput.input);
}
Also used : FSRecordReaderModule(org.apache.apex.malhar.lib.fs.FSRecordReaderModule) CsvFormatter(org.apache.apex.malhar.contrib.formatter.CsvFormatter) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser) StringFileOutputOperator(org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator)

Example 3 with CsvParser

use of org.apache.apex.malhar.contrib.parser.CsvParser in project apex-malhar by apache.

the class CSVMessageFormat method populateInputDAG.

@Override
public RelInfo populateInputDAG(DAG dag, JavaTypeFactory typeFactory) {
    CsvParser csvParser = dag.addOperator(OperatorUtils.getUniqueOperatorName("CSVParser"), CsvParser.class);
    csvParser.setSchema((String) operands.get(CSV_SCHEMA));
    return new RelInfo("CSVParser", Lists.<Operator.InputPort>newArrayList(csvParser.in), csvParser, csvParser.out, getRowType(typeFactory));
}
Also used : Operator(com.datatorrent.api.Operator) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser) RelInfo(org.apache.apex.malhar.sql.planner.RelInfo)

Example 4 with CsvParser

use of org.apache.apex.malhar.contrib.parser.CsvParser in project apex-malhar by apache.

the class SerDeTest method testPortEndpoint.

@Test
public void testPortEndpoint() throws IOException, ClassNotFoundException {
    LogicalPlan dag = new LogicalPlan();
    String schemaIn = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"id\",\"type\":\"Integer\"}," + "{\"name\":\"Product\",\"type\":\"String\"}," + "{\"name\":\"units\",\"type\":\"Integer\"}]}";
    String schemaOut = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime1\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"RowTime2\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"Product\",\"type\":\"String\"}]}";
    KafkaSinglePortInputOperator kafkaInput = dag.addOperator("KafkaInput", KafkaSinglePortInputOperator.class);
    kafkaInput.setTopics("testdata0");
    kafkaInput.setInitialOffset("EARLIEST");
    Properties props = new Properties();
    props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, KafkaEndpoint.KEY_DESERIALIZER);
    props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaEndpoint.VALUE_DESERIALIZER);
    kafkaInput.setConsumerProps(props);
    kafkaInput.setClusters("localhost:9092");
    CsvParser csvParser = dag.addOperator("CSVParser", CsvParser.class);
    csvParser.setSchema(schemaIn);
    dag.addStream("KafkaToCSV", kafkaInput.outputPort, csvParser.in);
    CsvFormatter formatter = dag.addOperator("CSVFormatter", CsvFormatter.class);
    formatter.setSchema(schemaOut);
    KafkaSinglePortOutputOperator kafkaOutput = dag.addOperator("KafkaOutput", KafkaSinglePortOutputOperator.class);
    kafkaOutput.setTopic("testresult");
    props = new Properties();
    props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaEndpoint.VALUE_SERIALIZER);
    props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, KafkaEndpoint.KEY_SERIALIZER);
    props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
    kafkaOutput.setProperties(props);
    dag.addStream("CSVToKafka", formatter.out, kafkaOutput.inputPort);
    SQLExecEnvironment.getEnvironment().registerTable("ORDERS", new StreamEndpoint(csvParser.out, InputPOJO.class)).registerTable("SALES", new StreamEndpoint(formatter.in, OutputPOJO.class)).registerFunction("APEXCONCAT", FileEndpointTest.class, "apex_concat_str").executeSQL(dag, "INSERT INTO SALES " + "SELECT STREAM ROWTIME, " + "FLOOR(ROWTIME TO DAY), " + "APEXCONCAT('OILPAINT', SUBSTRING(PRODUCT, 6, 7)) " + "FROM ORDERS WHERE ID > 3 " + "AND " + "PRODUCT LIKE 'paint%'");
    dag.validate();
}
Also used : StreamEndpoint(org.apache.apex.malhar.sql.table.StreamEndpoint) CsvFormatter(org.apache.apex.malhar.contrib.formatter.CsvFormatter) KafkaSinglePortInputOperator(org.apache.apex.malhar.kafka.KafkaSinglePortInputOperator) LogicalPlan(com.datatorrent.stram.plan.logical.LogicalPlan) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser) Properties(java.util.Properties) KafkaSinglePortOutputOperator(org.apache.apex.malhar.kafka.KafkaSinglePortOutputOperator) Test(org.junit.Test)

Example 5 with CsvParser

use of org.apache.apex.malhar.contrib.parser.CsvParser in project apex-malhar by apache.

the class FileToJdbcCsvParser method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration configuration) {
    // create operators
    FileReader fileReader = dag.addOperator("FileReader", FileReader.class);
    CsvParser csvParser = dag.addOperator("CsvParser", CsvParser.class);
    JdbcPOJOInsertOutputOperator jdbcOutputOperator = dag.addOperator("JdbcOutput", JdbcPOJOInsertOutputOperator.class);
    // configure operators
    String pojoSchema = SchemaUtils.jarResourceFileToString("schema.json");
    csvParser.setSchema(pojoSchema);
    jdbcOutputOperator.setFieldInfos(addFieldInfos());
    JdbcTransactionalStore outputStore = new JdbcTransactionalStore();
    jdbcOutputOperator.setStore(outputStore);
    // add stream
    dag.addStream("Bytes", fileReader.byteOutput, csvParser.in);
    dag.addStream("POJOs", csvParser.out, jdbcOutputOperator.input);
}
Also used : JdbcPOJOInsertOutputOperator(org.apache.apex.malhar.lib.db.jdbc.JdbcPOJOInsertOutputOperator) JdbcTransactionalStore(org.apache.apex.malhar.lib.db.jdbc.JdbcTransactionalStore) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser)

Aggregations

CsvParser (org.apache.apex.malhar.contrib.parser.CsvParser)6 CsvFormatter (org.apache.apex.malhar.contrib.formatter.CsvFormatter)3 KafkaSinglePortInputOperator (org.apache.apex.malhar.kafka.KafkaSinglePortInputOperator)2 FSRecordReaderModule (org.apache.apex.malhar.lib.fs.FSRecordReaderModule)2 StringFileOutputOperator (org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator)2 StreamEndpoint (org.apache.apex.malhar.sql.table.StreamEndpoint)2 Operator (com.datatorrent.api.Operator)1 LogicalPlan (com.datatorrent.stram.plan.logical.LogicalPlan)1 Properties (java.util.Properties)1 KafkaSinglePortOutputOperator (org.apache.apex.malhar.kafka.KafkaSinglePortOutputOperator)1 JdbcPOJOInsertOutputOperator (org.apache.apex.malhar.lib.db.jdbc.JdbcPOJOInsertOutputOperator)1 JdbcTransactionalStore (org.apache.apex.malhar.lib.db.jdbc.JdbcTransactionalStore)1 FilterOperator (org.apache.apex.malhar.lib.filter.FilterOperator)1 SQLExecEnvironment (org.apache.apex.malhar.sql.SQLExecEnvironment)1 RelInfo (org.apache.apex.malhar.sql.planner.RelInfo)1 CSVMessageFormat (org.apache.apex.malhar.sql.table.CSVMessageFormat)1 FileEndpoint (org.apache.apex.malhar.sql.table.FileEndpoint)1 Test (org.junit.Test)1