Search in sources :

Example 61 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.

the class ExcelInputReaderTest method testWithReProcessedFalse.

@Test
public void testWithReProcessedFalse() throws Exception {
    Map<String, String> sourceProperties = new ImmutableMap.Builder<String, String>().put(Constants.Reference.REFERENCE_NAME, "TestCase").put("filePath", sourceFolderUri).put("filePattern", ".*").put("sheet", "Sheet Name").put("sheetValue", "Sheet1").put("memoryTableName", "trackMemoryTableWithReProcessedFalse").put("tableExpiryPeriod", "30").put("reprocess", "false").put("columnList", "").put("columnMapping", "").put("skipFirstRow", "false").put("terminateIfEmptyRow", "false").put("rowsLimit", "10").put("outputSchema", "A:string,B:string,C:String,D:String,E:String,F:String").put("ifErrorRecord", "Ignore error and continue").put("errorDatasetName", "").build();
    ETLStage source = new ETLStage("ExcelInputtest", new ETLPlugin("Excel", BatchSource.PLUGIN_TYPE, sourceProperties, null));
    String outputDatasetName = "output-testWithReProcessedFalse";
    ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));
    ApplicationManager appManager = deployApp(source, sink, "testWithReProcessedFalse");
    DataSetManager<KeyValueTable> dataSetManager = getDataset("trackMemoryTableWithReProcessedFalse");
    KeyValueTable keyValueTable = dataSetManager.get();
    File testFile = new File(sourceFolder, excelTestFileTwo);
    keyValueTable.write(testFile.toURI().toString(), String.valueOf(System.currentTimeMillis()));
    dataSetManager.flush();
    startWorkflow(appManager, ProgramRunStatus.COMPLETED);
    DataSetManager<Table> outputManager = getDataset(outputDatasetName);
    List<StructuredRecord> output = MockSink.readOutput(outputManager);
    Map<String, String> nameIdMap = new HashMap<>();
    nameIdMap.put("john", "3.0");
    nameIdMap.put("romy", "1.0");
    nameIdMap.put("name", "id");
    Assert.assertEquals("Expected records", 3, output.size());
    Assert.assertEquals(nameIdMap.get(output.get(0).get("B")), output.get(0).get("A"));
    Assert.assertEquals(nameIdMap.get(output.get(1).get("B")), output.get(1).get("A"));
    Assert.assertEquals(nameIdMap.get(output.get(2).get("B")), output.get(2).get("A"));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) ImmutableMap(com.google.common.collect.ImmutableMap) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) File(java.io.File) Test(org.junit.Test)

Example 62 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.

the class XMLReaderBatchSourceTest method createExpiredRecord.

/**
 * Method to Pre-Populate File tracking KeyValue with 40 days old expired record.
 */
private void createExpiredRecord(String processedFileTable) throws Exception {
    DataSetManager<KeyValueTable> dataSetManager = getDataset(processedFileTable);
    KeyValueTable keyValueTable = dataSetManager.get();
    // Put expired record which is 40 days old
    File catalogSmall = new File(sourceFolder, CATALOG_SMALL_XML_FILE_NAME);
    Calendar cal = Calendar.getInstance();
    cal.add(Calendar.DATE, -40);
    Date expiryDate = cal.getTime();
    keyValueTable.write(Bytes.toBytes(catalogSmall.toURI().toString()), Bytes.toBytes(expiryDate.getTime()));
    dataSetManager.flush();
}
Also used : KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Calendar(java.util.Calendar) File(java.io.File) Date(java.util.Date)

Example 63 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.

the class ValueMapperTest method testWithNoDefaults.

@Test
public void testWithNoDefaults() throws Exception {
    String inputTable = "input_table_without_defaults";
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputTable));
    Map<String, String> sourceproperties = new ImmutableMap.Builder<String, String>().put("mapping", "designationid:designation_lookup_table_without_defaults:designationName").put("defaults", "").build();
    ETLStage transform = new ETLStage("transform", new ETLPlugin("ValueMapper", Transform.PLUGIN_TYPE, sourceproperties, null));
    String sinkTable = "output_table_without_defaults";
    ETLStage sink = new ETLStage("sink", MockSink.getPlugin(sinkTable));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(transform).addStage(sink).addConnection(source.getName(), transform.getName()).addConnection(transform.getName(), sink.getName()).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(BATCH_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("valuemappertest_without_defaults");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    addDatasetInstance(KeyValueTable.class.getName(), "designation_lookup_table_without_defaults");
    DataSetManager<KeyValueTable> dataSetManager = getDataset("designation_lookup_table_without_defaults");
    KeyValueTable keyValueTable = dataSetManager.get();
    keyValueTable.write("1".getBytes(Charsets.UTF_8), "SE".getBytes(Charsets.UTF_8));
    keyValueTable.write("2".getBytes(Charsets.UTF_8), "SSE".getBytes(Charsets.UTF_8));
    keyValueTable.write("3".getBytes(Charsets.UTF_8), "ML".getBytes(Charsets.UTF_8));
    keyValueTable.write("4".getBytes(Charsets.UTF_8), "TL".getBytes(Charsets.UTF_8));
    dataSetManager.flush();
    DataSetManager<Table> inputManager = getDataset(inputTable);
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "100").set(NAME, "John").set(SALARY, "1000").set(DESIGNATIONID, null).build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "101").set(NAME, "Kerry").set(SALARY, "1030").set(DESIGNATIONID, "2").build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "102").set(NAME, "Mathew").set(SALARY, "1230").set(DESIGNATIONID, "").build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "103").set(NAME, "Allie").set(SALARY, "2000").set(DESIGNATIONID, "4").build());
    MockSource.writeInput(inputManager, input);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 1, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(sinkTable);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Map<String, String> nameDesignationMap = new HashMap<>();
    nameDesignationMap.put("John", null);
    nameDesignationMap.put("Kerry", "SSE");
    nameDesignationMap.put("Mathew", "");
    nameDesignationMap.put("Allie", "TL");
    Map<String, String> nameSalaryMap = new HashMap<>();
    nameSalaryMap.put("John", "1000");
    nameSalaryMap.put("Kerry", "1030");
    nameSalaryMap.put("Mathew", "1230");
    nameSalaryMap.put("Allie", "2000");
    Assert.assertEquals(4, outputRecords.size());
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(0).get(NAME)), outputRecords.get(0).get(DESIGNATIONNAME));
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(1).get(NAME)), outputRecords.get(1).get(DESIGNATIONNAME));
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(2).get(NAME)), outputRecords.get(2).get(DESIGNATIONNAME));
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(3).get(NAME)), outputRecords.get(3).get(DESIGNATIONNAME));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(0).get(NAME)), outputRecords.get(0).get(SALARY));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(1).get(NAME)), outputRecords.get(1).get(SALARY));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(2).get(NAME)), outputRecords.get(2).get(SALARY));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(3).get(NAME)), outputRecords.get(3).get(SALARY));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) ImmutableMap(com.google.common.collect.ImmutableMap) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 64 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testSuccess.

private void testSuccess(boolean frequentFlushing) throws Exception {
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
    datasetCache.newTransactionContext();
    final TimeseriesTable table = datasetCache.getDataset("timeSeries");
    final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
    final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
    final Table counters = datasetCache.getDataset("counters");
    final Table countersFromContext = datasetCache.getDataset("countersFromContext");
    // 1) fill test data
    fillTestInputData(txExecutorFactory, table, false);
    // 2) run job
    final long start = System.currentTimeMillis();
    runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
    final long stop = System.currentTimeMillis();
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            Map<String, Long> expected = Maps.newHashMap();
            // note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
            expected.put("tag1", 18L);
            expected.put("tag2", 3L);
            expected.put("tag3", 18L);
            Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
            int count = 0;
            while (agg.hasNext()) {
                TimeseriesTable.Entry entry = agg.next();
                String tag = Bytes.toString(entry.getTags()[0]);
                Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
                count++;
            }
            Assert.assertEquals(expected.size(), count);
            Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
            Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
            Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
            Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
        }
    });
    datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Get(io.cdap.cdap.api.dataset.table.Get) Iterator(java.util.Iterator) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap)

Example 65 with KeyValueTable

use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.

the class MapReduceProgramRunnerTest method testFailure.

// TODO: this tests failure in Map tasks. We also need to test: failure in Reduce task, kill of a job by user.
private void testFailure(boolean frequentFlushing) throws Exception {
    // We want to verify that when mapreduce job fails:
    // * things written in initialize() remains and visible to others
    // * things written in tasks not visible to others TODO AAA: do invalidate
    // * things written in onfinish() remains and visible to others
    // NOTE: the code of this test is similar to testTimeSeriesRecordsCount() test. We put some "bad data" intentionally
    // here to be recognized by map tasks as a message to emulate failure
    final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
    // we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
    datasetCache.newTransactionContext();
    final TimeseriesTable table = datasetCache.getDataset("timeSeries");
    final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
    final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
    final Table counters = datasetCache.getDataset("counters");
    final Table countersFromContext = datasetCache.getDataset("countersFromContext");
    // 1) fill test data
    fillTestInputData(txExecutorFactory, table, true);
    // 2) run job
    final long start = System.currentTimeMillis();
    runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, false);
    final long stop = System.currentTimeMillis();
    // 3) verify results
    Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {

        @Override
        public void apply() {
            // data should be rolled back todo: test that partially written is rolled back too
            Assert.assertFalse(table.read(AggregateMetricsByTag.BY_TAGS, start, stop).hasNext());
            // but written beforeSubmit and onFinish is available to others
            Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
            Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
            Assert.assertEquals(0, counters.get(new Get("mapper")).getLong("records", 0));
            Assert.assertEquals(0, counters.get(new Get("reducer")).getLong("records", 0));
            Assert.assertEquals(0, countersFromContext.get(new Get("mapper")).getLong("records", 0));
            Assert.assertEquals(0, countersFromContext.get(new Get("reducer")).getLong("records", 0));
        }
    });
    datasetCache.dismissTransactionContext();
}
Also used : Table(io.cdap.cdap.api.dataset.table.Table) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable) ApplicationWithPrograms(io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Get(io.cdap.cdap.api.dataset.table.Get) TransactionExecutor(org.apache.tephra.TransactionExecutor) TimeseriesTable(io.cdap.cdap.api.dataset.lib.TimeseriesTable)

Aggregations

KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)122 Test (org.junit.Test)65 ApplicationManager (io.cdap.cdap.test.ApplicationManager)59 HashMap (java.util.HashMap)27 SparkManager (io.cdap.cdap.test.SparkManager)26 Table (io.cdap.cdap.api.dataset.table.Table)21 TransactionExecutor (org.apache.tephra.TransactionExecutor)20 WorkflowManager (io.cdap.cdap.test.WorkflowManager)19 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)18 ApplicationWithPrograms (io.cdap.cdap.internal.app.deploy.pipeline.ApplicationWithPrograms)18 KeyValue (io.cdap.cdap.api.dataset.lib.KeyValue)14 ServiceManager (io.cdap.cdap.test.ServiceManager)14 IOException (java.io.IOException)14 ArrayList (java.util.ArrayList)14 Location (org.apache.twill.filesystem.Location)14 ImmutableMap (com.google.common.collect.ImmutableMap)13 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)13 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)13 File (java.io.File)13 URL (java.net.URL)12