use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.
the class ExcelInputReaderTest method testWithReProcessedFalse.
@Test
public void testWithReProcessedFalse() throws Exception {
Map<String, String> sourceProperties = new ImmutableMap.Builder<String, String>().put(Constants.Reference.REFERENCE_NAME, "TestCase").put("filePath", sourceFolderUri).put("filePattern", ".*").put("sheet", "Sheet Name").put("sheetValue", "Sheet1").put("memoryTableName", "trackMemoryTableWithReProcessedFalse").put("tableExpiryPeriod", "30").put("reprocess", "false").put("columnList", "").put("columnMapping", "").put("skipFirstRow", "false").put("terminateIfEmptyRow", "false").put("rowsLimit", "10").put("outputSchema", "A:string,B:string,C:String,D:String,E:String,F:String").put("ifErrorRecord", "Ignore error and continue").put("errorDatasetName", "").build();
ETLStage source = new ETLStage("ExcelInputtest", new ETLPlugin("Excel", BatchSource.PLUGIN_TYPE, sourceProperties, null));
String outputDatasetName = "output-testWithReProcessedFalse";
ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));
ApplicationManager appManager = deployApp(source, sink, "testWithReProcessedFalse");
DataSetManager<KeyValueTable> dataSetManager = getDataset("trackMemoryTableWithReProcessedFalse");
KeyValueTable keyValueTable = dataSetManager.get();
File testFile = new File(sourceFolder, excelTestFileTwo);
keyValueTable.write(testFile.toURI().toString(), String.valueOf(System.currentTimeMillis()));
dataSetManager.flush();
startWorkflow(appManager, ProgramRunStatus.COMPLETED);
DataSetManager<Table> outputManager = getDataset(outputDatasetName);
List<StructuredRecord> output = MockSink.readOutput(outputManager);
Map<String, String> nameIdMap = new HashMap<>();
nameIdMap.put("john", "3.0");
nameIdMap.put("romy", "1.0");
nameIdMap.put("name", "id");
Assert.assertEquals("Expected records", 3, output.size());
Assert.assertEquals(nameIdMap.get(output.get(0).get("B")), output.get(0).get("A"));
Assert.assertEquals(nameIdMap.get(output.get(1).get("B")), output.get(1).get("A"));
Assert.assertEquals(nameIdMap.get(output.get(2).get("B")), output.get(2).get("A"));
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.
the class XMLReaderBatchSourceTest method createExpiredRecord.
/**
* Method to Pre-Populate File tracking KeyValue with 40 days old expired record.
*/
private void createExpiredRecord(String processedFileTable) throws Exception {
DataSetManager<KeyValueTable> dataSetManager = getDataset(processedFileTable);
KeyValueTable keyValueTable = dataSetManager.get();
// Put expired record which is 40 days old
File catalogSmall = new File(sourceFolder, CATALOG_SMALL_XML_FILE_NAME);
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DATE, -40);
Date expiryDate = cal.getTime();
keyValueTable.write(Bytes.toBytes(catalogSmall.toURI().toString()), Bytes.toBytes(expiryDate.getTime()));
dataSetManager.flush();
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.
the class ValueMapperTest method testWithNoDefaults.
@Test
public void testWithNoDefaults() throws Exception {
String inputTable = "input_table_without_defaults";
ETLStage source = new ETLStage("source", MockSource.getPlugin(inputTable));
Map<String, String> sourceproperties = new ImmutableMap.Builder<String, String>().put("mapping", "designationid:designation_lookup_table_without_defaults:designationName").put("defaults", "").build();
ETLStage transform = new ETLStage("transform", new ETLPlugin("ValueMapper", Transform.PLUGIN_TYPE, sourceproperties, null));
String sinkTable = "output_table_without_defaults";
ETLStage sink = new ETLStage("sink", MockSink.getPlugin(sinkTable));
ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(transform).addStage(sink).addConnection(source.getName(), transform.getName()).addConnection(transform.getName(), sink.getName()).build();
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(BATCH_ARTIFACT, etlConfig);
ApplicationId appId = NamespaceId.DEFAULT.app("valuemappertest_without_defaults");
ApplicationManager appManager = deployApplication(appId, appRequest);
addDatasetInstance(KeyValueTable.class.getName(), "designation_lookup_table_without_defaults");
DataSetManager<KeyValueTable> dataSetManager = getDataset("designation_lookup_table_without_defaults");
KeyValueTable keyValueTable = dataSetManager.get();
keyValueTable.write("1".getBytes(Charsets.UTF_8), "SE".getBytes(Charsets.UTF_8));
keyValueTable.write("2".getBytes(Charsets.UTF_8), "SSE".getBytes(Charsets.UTF_8));
keyValueTable.write("3".getBytes(Charsets.UTF_8), "ML".getBytes(Charsets.UTF_8));
keyValueTable.write("4".getBytes(Charsets.UTF_8), "TL".getBytes(Charsets.UTF_8));
dataSetManager.flush();
DataSetManager<Table> inputManager = getDataset(inputTable);
List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "100").set(NAME, "John").set(SALARY, "1000").set(DESIGNATIONID, null).build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "101").set(NAME, "Kerry").set(SALARY, "1030").set(DESIGNATIONID, "2").build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "102").set(NAME, "Mathew").set(SALARY, "1230").set(DESIGNATIONID, "").build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "103").set(NAME, "Allie").set(SALARY, "2000").set(DESIGNATIONID, "4").build());
MockSource.writeInput(inputManager, input);
WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
workflowManager.start();
workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 1, 5, TimeUnit.MINUTES);
DataSetManager<Table> outputManager = getDataset(sinkTable);
List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
Map<String, String> nameDesignationMap = new HashMap<>();
nameDesignationMap.put("John", null);
nameDesignationMap.put("Kerry", "SSE");
nameDesignationMap.put("Mathew", "");
nameDesignationMap.put("Allie", "TL");
Map<String, String> nameSalaryMap = new HashMap<>();
nameSalaryMap.put("John", "1000");
nameSalaryMap.put("Kerry", "1030");
nameSalaryMap.put("Mathew", "1230");
nameSalaryMap.put("Allie", "2000");
Assert.assertEquals(4, outputRecords.size());
Assert.assertEquals(nameDesignationMap.get(outputRecords.get(0).get(NAME)), outputRecords.get(0).get(DESIGNATIONNAME));
Assert.assertEquals(nameDesignationMap.get(outputRecords.get(1).get(NAME)), outputRecords.get(1).get(DESIGNATIONNAME));
Assert.assertEquals(nameDesignationMap.get(outputRecords.get(2).get(NAME)), outputRecords.get(2).get(DESIGNATIONNAME));
Assert.assertEquals(nameDesignationMap.get(outputRecords.get(3).get(NAME)), outputRecords.get(3).get(DESIGNATIONNAME));
Assert.assertEquals(nameSalaryMap.get(outputRecords.get(0).get(NAME)), outputRecords.get(0).get(SALARY));
Assert.assertEquals(nameSalaryMap.get(outputRecords.get(1).get(NAME)), outputRecords.get(1).get(SALARY));
Assert.assertEquals(nameSalaryMap.get(outputRecords.get(2).get(NAME)), outputRecords.get(2).get(SALARY));
Assert.assertEquals(nameSalaryMap.get(outputRecords.get(3).get(NAME)), outputRecords.get(3).get(SALARY));
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testSuccess.
private void testSuccess(boolean frequentFlushing) throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, false);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Map<String, Long> expected = Maps.newHashMap();
// note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
expected.put("tag1", 18L);
expected.put("tag2", 3L);
expected.put("tag3", 18L);
Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
int count = 0;
while (agg.hasNext()) {
TimeseriesTable.Entry entry = agg.next();
String tag = Bytes.toString(entry.getTags()[0]);
Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
count++;
}
Assert.assertEquals(expected.size(), count);
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
}
});
datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by caskdata.
the class MapReduceProgramRunnerTest method testFailure.
// TODO: this tests failure in Map tasks. We also need to test: failure in Reduce task, kill of a job by user.
private void testFailure(boolean frequentFlushing) throws Exception {
// We want to verify that when mapreduce job fails:
// * things written in initialize() remains and visible to others
// * things written in tasks not visible to others TODO AAA: do invalidate
// * things written in onfinish() remains and visible to others
// NOTE: the code of this test is similar to testTimeSeriesRecordsCount() test. We put some "bad data" intentionally
// here to be recognized by map tasks as a message to emulate failure
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, true);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, false);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
// data should be rolled back todo: test that partially written is rolled back too
Assert.assertFalse(table.read(AggregateMetricsByTag.BY_TAGS, start, stop).hasNext());
// but written beforeSubmit and onFinish is available to others
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertEquals(0, counters.get(new Get("mapper")).getLong("records", 0));
Assert.assertEquals(0, counters.get(new Get("reducer")).getLong("records", 0));
Assert.assertEquals(0, countersFromContext.get(new Get("mapper")).getLong("records", 0));
Assert.assertEquals(0, countersFromContext.get(new Get("reducer")).getLong("records", 0));
}
});
datasetCache.dismissTransactionContext();
}
Aggregations