Search in sources :

Example 1 with StringFileOutputOperator

use of org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator in project apex-malhar by apache.

the class Application method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    // ftp read operator. Configuration through resources/META-INF/properties.xml
    FTPStringInputOperator reader = dag.addOperator("Reader", new FTPStringInputOperator());
    // Set properties for the FTP input operator
    reader.setHost("localhost");
    reader.setUserName("ftp");
    reader.setDirectory("sourceDir");
    reader.setPartitionCount(2);
    // writer that writes strings to a file on hdfs
    StringFileOutputOperator writer = dag.addOperator("Writer", new StringFileOutputOperator());
    // Set properties for the output operator
    writer.setFilePath("malhar_examples/ftp");
    writer.setFilePath("destination");
    // Connect reader output to writer
    dag.addStream("data", reader.output, writer.input);
}
Also used : FTPStringInputOperator(org.apache.apex.malhar.lib.io.AbstractFTPInputOperator.FTPStringInputOperator) StringFileOutputOperator(org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator)

Example 2 with StringFileOutputOperator

use of org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator in project apex-malhar by apache.

the class Application method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    FSRecordReaderModule recordReader = dag.addModule("recordReader", FSRecordReaderModule.class);
    CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class);
    CsvFormatter formatter = dag.addOperator("formatter", new CsvFormatter());
    StringFileOutputOperator fileOutput = dag.addOperator("fileOutput", new StringFileOutputOperator());
    dag.addStream("record", recordReader.records, csvParser.in);
    dag.addStream("pojo", csvParser.out, formatter.in);
    dag.addStream("string", formatter.out, fileOutput.input);
}
Also used : FSRecordReaderModule(org.apache.apex.malhar.lib.fs.FSRecordReaderModule) CsvFormatter(org.apache.apex.malhar.contrib.formatter.CsvFormatter) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser) StringFileOutputOperator(org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator)

Example 3 with StringFileOutputOperator

use of org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator in project apex-malhar by apache.

the class Application method populateDAG.

@Override
public void populateDAG(DAG dag, Configuration conf) {
    FSRecordReaderModule recordReader = dag.addModule("recordReader", FSRecordReaderModule.class);
    CsvParser csvParser = dag.addOperator("csvParser", CsvParser.class);
    FilterOperator filterOperator = dag.addOperator("filterOperator", new FilterOperator());
    CsvFormatter selectedFormatter = dag.addOperator("selectedFormatter", new CsvFormatter());
    CsvFormatter rejectedFormatter = dag.addOperator("rejectedFormatter", new CsvFormatter());
    StringFileOutputOperator selectedOutput = dag.addOperator("selectedOutput", new StringFileOutputOperator());
    StringFileOutputOperator rejectedOutput = dag.addOperator("rejectedOutput", new StringFileOutputOperator());
    dag.addStream("record", recordReader.records, csvParser.in);
    dag.addStream("pojo", csvParser.out, filterOperator.input);
    dag.addStream("pojoSelected", filterOperator.truePort, selectedFormatter.in);
    dag.addStream("pojoRejected", filterOperator.falsePort, rejectedFormatter.in);
    dag.addStream("csvSelected", selectedFormatter.out, selectedOutput.input);
    dag.addStream("csvRejected", rejectedFormatter.out, rejectedOutput.input);
}
Also used : FilterOperator(org.apache.apex.malhar.lib.filter.FilterOperator) FSRecordReaderModule(org.apache.apex.malhar.lib.fs.FSRecordReaderModule) CsvFormatter(org.apache.apex.malhar.contrib.formatter.CsvFormatter) CsvParser(org.apache.apex.malhar.contrib.parser.CsvParser) StringFileOutputOperator(org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator)

Example 4 with StringFileOutputOperator

use of org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator in project apex-malhar by apache.

the class GenericFileOutputOperatorTest method testIdleWindowsFinalize.

/**
 * Test file rollover in case of idle windows
 *
 * @throws IOException
 */
@Test
public void testIdleWindowsFinalize() throws IOException {
    StringFileOutputOperator writer = new StringFileOutputOperator();
    writer.setOutputFileName("output.txt");
    writer.setFilePath(testMeta.getDir());
    writer.setAlwaysWriteToTmp(true);
    writer.setMaxIdleWindows(5);
    writer.setup(testMeta.testOperatorContext);
    String[][] tuples = { { "0a", "0b" }, { "1a", "1b" }, {}, {}, {}, {}, { "6a", "6b" }, { "7a", "7b" }, {}, {}, {}, {}, {}, { "13a", "13b" }, { "14a", "14b" }, {}, {}, {}, { "18a", "18b" }, { "19a", "19b" }, {}, {}, {}, {}, {}, {}, { "26a", "26b" } };
    for (int i = 0; i <= 12; i++) {
        writer.beginWindow(i);
        for (String t : tuples[i]) {
            writer.input.put(t);
        }
        writer.endWindow();
    }
    checkpoint(writer, 10);
    writer.committed(10);
    for (int i = 13; i <= 26; i++) {
        writer.beginWindow(i);
        for (String t : tuples[i]) {
            writer.input.put(t);
        }
        writer.endWindow();
    }
    checkpoint(writer, 20);
    writer.committed(20);
    checkpoint(writer, 26);
    writer.committed(26);
    String[] expected = { "0a\n0b\n1a\n1b\n6a\n6b\n7a\n7b\n", "13a\n13b\n14a\n14b\n18a\n18b\n19a\n19b\n", "26a\n26b\n" };
    for (int i = 0; i < expected.length; i++) {
        checkOutput(i, testMeta.getDir() + "/output.txt_0", expected[i], true);
    }
}
Also used : StringFileOutputOperator(org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator) Test(org.junit.Test) AbstractFileOutputOperatorTest(org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest)

Aggregations

StringFileOutputOperator (org.apache.apex.malhar.lib.fs.GenericFileOutputOperator.StringFileOutputOperator)4 CsvFormatter (org.apache.apex.malhar.contrib.formatter.CsvFormatter)2 CsvParser (org.apache.apex.malhar.contrib.parser.CsvParser)2 FSRecordReaderModule (org.apache.apex.malhar.lib.fs.FSRecordReaderModule)2 FilterOperator (org.apache.apex.malhar.lib.filter.FilterOperator)1 FTPStringInputOperator (org.apache.apex.malhar.lib.io.AbstractFTPInputOperator.FTPStringInputOperator)1 AbstractFileOutputOperatorTest (org.apache.apex.malhar.lib.io.fs.AbstractFileOutputOperatorTest)1 Test (org.junit.Test)1