Search in sources :

Example 1 with CSVMessageFormat

use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.

the class PureStyleSQLApplication method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    // Source definition
    String schemaInName = conf.get("schemaInName");
    String schemaInDef = conf.get("schemaInDef");
    String broker = conf.get("broker");
    String sourceTopic = conf.get("topic");
    // Destination definition
    String schemaOutName = conf.get("schemaOutName");
    String schemaOutDef = conf.get("schemaOutDef");
    String outputFolder = conf.get("outputFolder");
    String outFilename = conf.get("destFileName");
    // SQL statement
    String sql = conf.get("sql");
    SQLExecEnvironment.getEnvironment().registerTable(schemaInName, new KafkaEndpoint(broker, sourceTopic, new CSVMessageFormat(schemaInDef))).registerTable(schemaOutName, new FileEndpoint(outputFolder, outFilename, new CSVMessageFormat(schemaOutDef))).registerFunction("APEXCONCAT", this.getClass(), "apex_concat_str").executeSQL(dag, sql);
}
Also used : CSVMessageFormat(org.apache.apex.malhar.sql.table.CSVMessageFormat) KafkaEndpoint(org.apache.apex.malhar.sql.table.KafkaEndpoint) FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint)

Example 2 with CSVMessageFormat

use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.

the class FusionStyleSQLApplication method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    SQLExecEnvironment env = SQLExecEnvironment.getEnvironment();
    env.registerFunction("APEXCONCAT", PureStyleSQLApplication.class, "apex_concat_str");
    Map<String, Class> fieldMapping = ImmutableMap.<String, Class>of("RowTime", Date.class, "id", Integer.class, "Product", String.class, "units", Integer.class);
    // Add Kafka Input
    KafkaSinglePortInputOperator kafkaInput = dag.addOperator("KafkaInput", KafkaSinglePortInputOperator.class);
    kafkaInput.setInitialOffset("EARLIEST");
    // Add CSVParser
    CsvParser csvParser = dag.addOperator("CSVParser", CsvParser.class);
    dag.addStream("KafkaToCSV", kafkaInput.outputPort, csvParser.in);
    // Register CSV Parser output as input table for first SQL
    env.registerTable(conf.get("sqlSchemaInputName"), new StreamEndpoint(csvParser.out, fieldMapping));
    // Register FileEndpoint as output table for second SQL.
    env.registerTable(conf.get("sqlSchemaOutputName"), new FileEndpoint(conf.get("folderPath"), conf.get("fileName"), new CSVMessageFormat(conf.get("sqlSchemaOutputDef"))));
    // Add second SQL to DAG
    env.executeSQL(dag, conf.get("sql"));
}
Also used : StreamEndpoint(org.apache.apex.malhar.sql.table.StreamEndpoint) CSVMessageFormat(org.apache.apex.malhar.sql.table.CSVMessageFormat) SQLExecEnvironment(org.apache.apex.malhar.sql.SQLExecEnvironment) KafkaSinglePortInputOperator(org.apache.apex.malhar.kafka.KafkaSinglePortInputOperator) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser) FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint)

Example 3 with CSVMessageFormat

use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.

the class ApexSQLTableFactory method create.

@SuppressWarnings("unchecked")
@Override
public Table create(SchemaPlus schemaPlus, String name, Map<String, Object> operands, RelDataType rowType) {
    Endpoint endpoint;
    String endpointSystemType = (String) operands.get(Endpoint.ENDPOINT);
    if (endpointSystemType.equalsIgnoreCase(Endpoint.EndpointType.FILE.name())) {
        endpoint = new FileEndpoint();
    } else if (endpointSystemType.equalsIgnoreCase(Endpoint.EndpointType.KAFKA.name())) {
        endpoint = new KafkaEndpoint();
    } else {
        throw new RuntimeException("Cannot find endpoint");
    }
    endpoint.setEndpointOperands((Map<String, Object>) operands.get(Endpoint.SYSTEM_OPERANDS));
    MessageFormat mf;
    String messageFormat = (String) operands.get(MessageFormat.MESSAGE_FORMAT);
    if (messageFormat.equalsIgnoreCase(MessageFormat.MessageFormatType.CSV.name())) {
        mf = new CSVMessageFormat();
    } else {
        throw new RuntimeException("Cannot find message format");
    }
    mf.setMessageFormatOperands((Map<String, Object>) operands.get(MessageFormat.MESSAGE_FORMAT_OPERANDS));
    endpoint.setMessageFormat(mf);
    return new ApexSQLTable(schemaPlus, name, operands, rowType, endpoint);
}
Also used : FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint) KafkaEndpoint(org.apache.apex.malhar.sql.table.KafkaEndpoint) Endpoint(org.apache.apex.malhar.sql.table.Endpoint) MessageFormat(org.apache.apex.malhar.sql.table.MessageFormat) CSVMessageFormat(org.apache.apex.malhar.sql.table.CSVMessageFormat) CSVMessageFormat(org.apache.apex.malhar.sql.table.CSVMessageFormat) KafkaEndpoint(org.apache.apex.malhar.sql.table.KafkaEndpoint) FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint)

Example 4 with CSVMessageFormat

use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.

the class SerDeTest method testSQLWithAPI.

@Test
public void testSQLWithAPI() throws ClassNotFoundException, IOException {
    LogicalPlan dag = new LogicalPlan();
    String schema = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss Z\"}}," + "{\"name\":\"id\",\"type\":\"Integer\"},{\"name\":\"Product\",\"type\":\"String\"}," + "{\"name\":\"units\",\"type\":\"Integer\"}]}";
    Endpoint endpoint = new FileEndpoint("dummyFilePath", new CSVMessageFormat(schema));
    SQLExecEnvironment.getEnvironment().registerTable("ORDERS", endpoint).executeSQL(dag, "SELECT STREAM FLOOR(ROWTIME TO HOUR), SUBSTRING(PRODUCT, 0, 5) FROM ORDERS WHERE id > 3");
    dag.validate();
}
Also used : FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint) KafkaEndpoint(org.apache.apex.malhar.sql.table.KafkaEndpoint) Endpoint(org.apache.apex.malhar.sql.table.Endpoint) StreamEndpoint(org.apache.apex.malhar.sql.table.StreamEndpoint) CSVMessageFormat(org.apache.apex.malhar.sql.table.CSVMessageFormat) LogicalPlan(com.datatorrent.stram.plan.logical.LogicalPlan) FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint) Test(org.junit.Test)

Example 5 with CSVMessageFormat

use of org.apache.apex.malhar.sql.table.CSVMessageFormat in project apex-malhar by apache.

the class SerDeTest method testSQLSelectInsertWithAPI.

@Test
public void testSQLSelectInsertWithAPI() throws IOException, ClassNotFoundException {
    LogicalPlan dag = new LogicalPlan();
    String schemaIn = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss\"}}," + "{\"name\":\"id\",\"type\":\"Integer\"}," + "{\"name\":\"Product\",\"type\":\"String\"}," + "{\"name\":\"units\",\"type\":\"Integer\"}]}";
    String schemaOut = "{\"separator\":\",\",\"quoteChar\":\"\\\"\",\"fields\":[" + "{\"name\":\"RowTime\",\"type\":\"Date\",\"constraints\":{\"format\":\"dd/MM/yyyy hh:mm:ss\"}}," + "{\"name\":\"Product\",\"type\":\"String\"}]}";
    SQLExecEnvironment.getEnvironment().registerTable("ORDERS", new FileEndpoint("dummyFilePathInput", new CSVMessageFormat(schemaIn))).registerTable("SALES", new FileEndpoint("dummyFilePathOutput", "out.tmp", new CSVMessageFormat(schemaOut))).executeSQL(dag, "INSERT INTO SALES SELECT STREAM FLOOR(ROWTIME TO HOUR), SUBSTRING(PRODUCT, 0, 5) " + "FROM ORDERS WHERE id > 3");
    dag.validate();
}
Also used : CSVMessageFormat(org.apache.apex.malhar.sql.table.CSVMessageFormat) LogicalPlan(com.datatorrent.stram.plan.logical.LogicalPlan) FileEndpoint(org.apache.apex.malhar.sql.table.FileEndpoint) Test(org.junit.Test)

Aggregations

CSVMessageFormat (org.apache.apex.malhar.sql.table.CSVMessageFormat)8 FileEndpoint (org.apache.apex.malhar.sql.table.FileEndpoint)6 KafkaEndpoint (org.apache.apex.malhar.sql.table.KafkaEndpoint)5 LogicalPlan (com.datatorrent.stram.plan.logical.LogicalPlan)4 Test (org.junit.Test)4 Endpoint (org.apache.apex.malhar.sql.table.Endpoint)2 StreamEndpoint (org.apache.apex.malhar.sql.table.StreamEndpoint)2 CsvParser (org.apache.apex.malhar.contrib.parser.CsvParser)1 KafkaSinglePortInputOperator (org.apache.apex.malhar.kafka.KafkaSinglePortInputOperator)1 SQLExecEnvironment (org.apache.apex.malhar.sql.SQLExecEnvironment)1 MessageFormat (org.apache.apex.malhar.sql.table.MessageFormat)1