use of org.apache.apex.malhar.contrib.formatter.CsvFormatter in project apex-malhar by apache.
the class Application method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
FSRecordReaderModule recordReader = dag.addModule("recordReader", FSRecordReaderModule.class);
CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class);
CsvFormatter formatter = dag.addOperator("formatter", new CsvFormatter());
StringFileOutputOperator fileOutput = dag.addOperator("fileOutput", new StringFileOutputOperator());
dag.addStream("record", recordReader.records, csvParser.in);
dag.addStream("pojo", csvParser.out, formatter.in);
dag.addStream("string", formatter.out, fileOutput.input);
}
use of org.apache.apex.malhar.contrib.formatter.CsvFormatter in project apex-malhar by apache.
the class Application method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
FSRecordReaderModule recordReader = dag.addModule("recordReader", FSRecordReaderModule.class);
CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class);
FilterOperator filterOperator = dag.addOperator("filterOperator", new FilterOperator());
CsvFormatter selectedFormatter = dag.addOperator("selectedFormatter", new CsvFormatter());
CsvFormatter rejectedFormatter = dag.addOperator("rejectedFormatter", new CsvFormatter());
StringFileOutputOperator selectedOutput = dag.addOperator("selectedOutput", new StringFileOutputOperator());
StringFileOutputOperator rejectedOutput = dag.addOperator("rejectedOutput", new StringFileOutputOperator());
dag.addStream("record", recordReader.records, csvParser.in);
dag.addStream("pojo", csvParser.out, filterOperator.input);
dag.addStream("pojoSelected", filterOperator.truePort, selectedFormatter.in);
dag.addStream("pojoRejected", filterOperator.falsePort, rejectedFormatter.in);
dag.addStream("csvSelected", selectedFormatter.out, selectedOutput.input);
dag.addStream("csvRejected", rejectedFormatter.out, rejectedOutput.input);
}
use of org.apache.apex.malhar.contrib.formatter.CsvFormatter in project apex-malhar by apache.
the class CSVMessageFormat method populateOutputDAG.
@Override
public RelInfo populateOutputDAG(DAG dag, JavaTypeFactory typeFactory) {
CsvFormatter formatter = dag.addOperator(OperatorUtils.getUniqueOperatorName("CSVFormatter"), CsvFormatter.class);
formatter.setSchema((String) operands.get(CSV_SCHEMA));
return new RelInfo("CSVFormatter", Lists.<Operator.InputPort>newArrayList(formatter.in), formatter, formatter.out, getRowType(typeFactory));
}
use of org.apache.apex.malhar.contrib.formatter.CsvFormatter in project apex-malhar by apache.
the class SerDeTest method testPortEndpoint.
@Test
public void testPortEndpoint() throws IOException, ClassNotFoundException {
LogicalPlan dag = new LogicalPlan();
String schemaIn = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"id\",\"type\":\"Integer\"}," + "{\"name\":\"Product\",\"type\":\"String\"}," + "{\"name\":\"units\",\"type\":\"Integer\"}]}";
String schemaOut = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime1\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"RowTime2\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"Product\",\"type\":\"String\"}]}";
KafkaSinglePortInputOperator kafkaInput = dag.addOperator("KafkaInput", KafkaSinglePortInputOperator.class);
kafkaInput.setTopics("testdata0");
kafkaInput.setInitialOffset("EARLIEST");
Properties props = new Properties();
props.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, KafkaEndpoint.KEY_DESERIALIZER);
props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaEndpoint.VALUE_DESERIALIZER);
kafkaInput.setConsumerProps(props);
kafkaInput.setClusters("localhost:9092");
CsvParser csvParser = dag.addOperator("CSVParser", CsvParser.class);
csvParser.setSchema(schemaIn);
dag.addStream("KafkaToCSV", kafkaInput.outputPort, csvParser.in);
CsvFormatter formatter = dag.addOperator("CSVFormatter", CsvFormatter.class);
formatter.setSchema(schemaOut);
KafkaSinglePortOutputOperator kafkaOutput = dag.addOperator("KafkaOutput", KafkaSinglePortOutputOperator.class);
kafkaOutput.setTopic("testresult");
props = new Properties();
props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaEndpoint.VALUE_SERIALIZER);
props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, KafkaEndpoint.KEY_SERIALIZER);
props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
kafkaOutput.setProperties(props);
dag.addStream("CSVToKafka", formatter.out, kafkaOutput.inputPort);
SQLExecEnvironment.getEnvironment().registerTable("ORDERS", new StreamEndpoint(csvParser.out, InputPOJO.class)).registerTable("SALES", new StreamEndpoint(formatter.in, OutputPOJO.class)).registerFunction("APEXCONCAT", FileEndpointTest.class, "apex_concat_str").executeSQL(dag, "INSERT INTO SALES " + "SELECT STREAM ROWTIME, " + "FLOOR(ROWTIME TO DAY), " + "APEXCONCAT('OILPAINT', SUBSTRING(PRODUCT, 6, 7)) " + "FROM ORDERS WHERE ID > 3 " + "AND " + "PRODUCT LIKE 'paint%'");
dag.validate();
}
use of org.apache.apex.malhar.contrib.formatter.CsvFormatter in project apex-malhar by apache.
the class Application method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
JsonGenerator generator = dag.addOperator("JsonGenerator", JsonGenerator.class);
JsonParser jsonParser = dag.addOperator("jsonParser", JsonParser.class);
CsvFormatter formatter = dag.addOperator("formatter", CsvFormatter.class);
formatter.setSchema(SchemaUtils.jarResourceFileToString(filename));
dag.setInputPortAttribute(formatter.in, PortContext.TUPLE_CLASS, PojoEvent.class);
HDFSOutputOperator<String> hdfsOutput = dag.addOperator("HDFSOutputOperator", HDFSOutputOperator.class);
hdfsOutput.setLineDelimiter("");
dag.addStream("parserStream", generator.out, jsonParser.in);
dag.addStream("formatterStream", jsonParser.out, formatter.in);
dag.addStream("outputStream", formatter.out, hdfsOutput.input);
}
Aggregations