use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.
the class PureStyleSQLApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
// Source definition
String schemaInName = conf.get("schemaInName");
String schemaInDef = conf.get("schemaInDef");
String broker = conf.get("broker");
String sourceTopic = conf.get("topic");
// Destination definition
String schemaOutName = conf.get("schemaOutName");
String schemaOutDef = conf.get("schemaOutDef");
String outputFolder = conf.get("outputFolder");
String outFilename = conf.get("destFileName");
// SQL statement
String sql = conf.get("sql");
SQLExecEnvironment.getEnvironment().registerTable(schemaInName, new KafkaEndpoint(broker, sourceTopic, new CSVMessageFormat(schemaInDef))).registerTable(schemaOutName, new FileEndpoint(outputFolder, outFilename, new CSVMessageFormat(schemaOutDef))).registerFunction("APEXCONCAT", this.getClass(), "apex_concat_str").executeSQL(dag, sql);
}
use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.
the class FusionStyleSQLApplication method populateDAG.
@Override
public void populateDAG(DAG dag, Configuration conf) {
SQLExecEnvironment env = SQLExecEnvironment.getEnvironment();
env.registerFunction("APEXCONCAT", PureStyleSQLApplication.class, "apex_concat_str");
Map<String, Class> fieldMapping = ImmutableMap.<String, Class>of("RowTime", Date.class, "id", Integer.class, "Product", String.class, "units", Integer.class);
// Add Kafka Input
KafkaSinglePortInputOperator kafkaInput = dag.addOperator("KafkaInput", KafkaSinglePortInputOperator.class);
kafkaInput.setInitialOffset("EARLIEST");
// Add CSVParser
CsvParser csvParser = dag.addOperator("CSVParser", CsvParser.class);
dag.addStream("KafkaToCSV", kafkaInput.outputPort, csvParser.in);
// Register CSV Parser output as input table for first SQL
env.registerTable(conf.get("sqlSchemaInputName"), new StreamEndpoint(csvParser.out, fieldMapping));
// Register FileEndpoint as output table for second SQL.
env.registerTable(conf.get("sqlSchemaOutputName"), new FileEndpoint(conf.get("folderPath"), conf.get("fileName"), new CSVMessageFormat(conf.get("sqlSchemaOutputDef"))));
// Add second SQL to DAG
env.executeSQL(dag, conf.get("sql"));
}
use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.
the class ApexSQLTableFactory method create.
@SuppressWarnings("unchecked")
@Override
public Table create(SchemaPlus schemaPlus, String name, Map<String, Object> operands, RelDataType rowType) {
Endpoint endpoint;
String endpointSystemType = (String) operands.get(Endpoint.ENDPOINT);
if (endpointSystemType.equalsIgnoreCase(Endpoint.EndpointType.FILE.name())) {
endpoint = new FileEndpoint();
} else if (endpointSystemType.equalsIgnoreCase(Endpoint.EndpointType.KAFKA.name())) {
endpoint = new KafkaEndpoint();
} else {
throw new RuntimeException("Cannot find endpoint");
}
endpoint.setEndpointOperands((Map<String, Object>) operands.get(Endpoint.SYSTEM_OPERANDS));
MessageFormat mf;
String messageFormat = (String) operands.get(MessageFormat.MESSAGE_FORMAT);
if (messageFormat.equalsIgnoreCase(MessageFormat.MessageFormatType.CSV.name())) {
mf = new CSVMessageFormat();
} else {
throw new RuntimeException("Cannot find message format");
}
mf.setMessageFormatOperands((Map<String, Object>) operands.get(MessageFormat.MESSAGE_FORMAT_OPERANDS));
endpoint.setMessageFormat(mf);
return new ApexSQLTable(schemaPlus, name, operands, rowType, endpoint);
}
use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.
the class SerDeTest method testSQLWithAPI.
@Test
public void testSQLWithAPI() throws ClassNotFoundException, IOException {
LogicalPlan dag = new LogicalPlan();
String schema = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"id\",\"type\":\"Integer\"},{\"name\":\"Product\",\"type\":\"String\"}," + "{\"name\":\"units\",\"type\":\"Integer\"}]}";
Endpoint endpoint = new FileEndpoint("dummyFilePath", new CSVMessageFormat(schema));
SQLExecEnvironment.getEnvironment().registerTable("ORDERS", endpoint).executeSQL(dag, "SELECT STREAM FLOOR(ROWTIME TO HOUR), SUBSTRING(PRODUCT, 0, 5) FROM ORDERS WHERE id > 3");
dag.validate();
}
use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.
the class SerDeTest method testSQLSelectInsertWithAPI.
@Test
public void testSQLSelectInsertWithAPI() throws IOException, ClassNotFoundException {
LogicalPlan dag = new LogicalPlan();
String schemaIn = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss\"}}," + "{\"name\":\"id\",\"type\":\"Integer\"}," + "{\"name\":\"Product\",\"type\":\"String\"}," + "{\"name\":\"units\",\"type\":\"Integer\"}]}";
String schemaOut = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss\"}}," + "{\"name\":\"Product\",\"type\":\"String\"}]}";
SQLExecEnvironment.getEnvironment().registerTable("ORDERS", new FileEndpoint("dummyFilePathInput", new CSVMessageFormat(schemaIn))).registerTable("SALES", new FileEndpoint("dummyFilePathOutput", "out.tmp", new CSVMessageFormat(schemaOut))).executeSQL(dag, "INSERT INTO SALES SELECT STREAM FLOOR(ROWTIME TO HOUR), SUBSTRING(PRODUCT, 0, 5) " + "FROM ORDERS WHERE id > 3");
dag.validate();
}
Aggregations