use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class MapReduceProgramRunnerTest method testSuccess.
private void testSuccess(boolean frequentFlushing) throws Exception {
final ApplicationWithPrograms app = deployApp(AppWithMapReduce.class);
// we need to start a tx context and do a "get" on all datasets so that they are in datasetCache
datasetCache.newTransactionContext();
final TimeseriesTable table = datasetCache.getDataset("timeSeries");
final KeyValueTable beforeSubmitTable = datasetCache.getDataset("beforeSubmit");
final KeyValueTable onFinishTable = datasetCache.getDataset("onFinish");
final Table counters = datasetCache.getDataset("counters");
final Table countersFromContext = datasetCache.getDataset("countersFromContext");
// 1) fill test data
fillTestInputData(txExecutorFactory, table, false);
// 2) run job
final long start = System.currentTimeMillis();
runProgram(app, AppWithMapReduce.AggregateTimeseriesByTag.class, frequentFlushing, true);
final long stop = System.currentTimeMillis();
// 3) verify results
Transactions.createTransactionExecutor(txExecutorFactory, datasetCache.getTransactionAwares()).execute(new TransactionExecutor.Subroutine() {
@Override
public void apply() {
Map<String, Long> expected = Maps.newHashMap();
// note: not all records add to the sum since filter by tag="tag1" and ts={1..3} is used
expected.put("tag1", 18L);
expected.put("tag2", 3L);
expected.put("tag3", 18L);
Iterator<TimeseriesTable.Entry> agg = table.read(AggregateMetricsByTag.BY_TAGS, start, stop);
int count = 0;
while (agg.hasNext()) {
TimeseriesTable.Entry entry = agg.next();
String tag = Bytes.toString(entry.getTags()[0]);
Assert.assertEquals((long) expected.get(tag), Bytes.toLong(entry.getValue()));
count++;
}
Assert.assertEquals(expected.size(), count);
Assert.assertArrayEquals(Bytes.toBytes("beforeSubmit:done"), beforeSubmitTable.read(Bytes.toBytes("beforeSubmit")));
Assert.assertArrayEquals(Bytes.toBytes("onFinish:done"), onFinishTable.read(Bytes.toBytes("onFinish")));
Assert.assertTrue(counters.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(counters.get(new Get("reducer")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("mapper")).getLong("records", 0) > 0);
Assert.assertTrue(countersFromContext.get(new Get("reducer")).getLong("records", 0) > 0);
}
});
datasetCache.dismissTransactionContext();
// todo: verify metrics. Will be possible after refactor for CDAP-765
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class FakeDatasetDefinition method getDataset.
@Override
public FakeDataset getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
DatasetSpecification kvTableSpec = spec.getSpecification("objects");
KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
return new FakeDataset(spec.getName(), table);
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project cdap by cdapio.
the class ObjectStoreDefinition method getDataset.
@Override
public ObjectStoreDataset<?> getDataset(DatasetContext datasetContext, DatasetSpecification spec, Map<String, String> arguments, ClassLoader classLoader) throws IOException {
DatasetSpecification kvTableSpec = spec.getSpecification("objects");
KeyValueTable table = tableDef.getDataset(datasetContext, kvTableSpec, arguments, classLoader);
TypeRepresentation typeRep = GSON.fromJson(spec.getProperty("type"), TypeRepresentation.class);
Schema schema = GSON.fromJson(spec.getProperty("schema"), Schema.class);
return new ObjectStoreDataset(spec.getName(), table, typeRep, schema, classLoader);
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.
the class ExcelInputReader method transform.
@Override
public void transform(KeyValue<LongWritable, Object> input, Emitter<StructuredRecord> emitter) throws Exception {
getOutputSchema();
StructuredRecord.Builder builder = StructuredRecord.builder(outputSchema);
String inputValue = input.getValue().toString();
String[] excelRecord = inputValue.split(CELL_SEPERATION);
String fileName = excelRecord[1];
String sheetName = excelRecord[2];
String ifEndRow = excelRecord[3];
int currentRowNum = Integer.parseInt(excelRecord[0]);
if (currentRowNum - prevRowNum > 1 && excelInputreaderConfig.terminateIfEmptyRow.equalsIgnoreCase("true")) {
throw new ExecutionException("Encountered empty row while reading Excel file :" + fileName + " . Terminating processing", new Throwable());
}
prevRowNum = currentRowNum;
Map<String, String> excelColumnValueMap = new HashMap<>();
for (String column : excelRecord) {
String[] columnValue = column.split(COLUMN_SEPERATION);
if (columnValue.length > 1) {
String name = columnValue[0];
String value = columnValue[1];
if (columnMapping.containsKey(name)) {
excelColumnValueMap.put(columnMapping.get(name), value);
} else {
excelColumnValueMap.put(name, value);
}
}
}
try {
for (Schema.Field field : outputSchema.getFields()) {
String fieldName = field.getName();
if (excelColumnValueMap.containsKey(fieldName)) {
builder.convertAndSet(fieldName, excelColumnValueMap.get(fieldName));
} else {
builder.set(fieldName, NULL);
}
}
builder.set(FILE, new Path(fileName).getName());
builder.set(SHEET, sheetName);
emitter.emit(builder.build());
if (ifEndRow.equalsIgnoreCase(END) && !Strings.isNullOrEmpty(excelInputreaderConfig.memoryTableName)) {
KeyValueTable processedFileMemoryTable = batchRuntimeContext.getDataset(excelInputreaderConfig.memoryTableName);
processedFileMemoryTable.write(Bytes.toBytes(fileName), Bytes.toBytes(new Date().getTime()));
}
} catch (Exception e) {
switch(excelInputreaderConfig.ifErrorRecord) {
case EXIT_ON_ERROR:
throw new IllegalStateException("Terminating processing on error : " + e.getMessage());
case WRITE_ERROR_DATASET:
StructuredRecord.Builder errorRecordBuilder = StructuredRecord.builder(errorRecordSchema);
errorRecordBuilder.set(KEY, fileName + "_" + sheetName + "_" + excelRecord[0]);
errorRecordBuilder.set(FILE, fileName);
errorRecordBuilder.set(SHEET, sheetName);
errorRecordBuilder.set(RECORD, inputValue);
Table errorTable = batchRuntimeContext.getDataset(excelInputreaderConfig.errorDatasetName);
errorTable.write(errorRecordBuilder.build());
break;
default:
// ignore on error
LOG.error("Error while reading excel input: ", e);
break;
}
}
}
use of io.cdap.cdap.api.dataset.lib.KeyValueTable in project hydrator-plugins by cdapio.
the class ExcelInputReaderTest method testWithTTL.
@Test
public void testWithTTL() throws Exception {
Map<String, String> sourceProperties = new ImmutableMap.Builder<String, String>().put(Constants.Reference.REFERENCE_NAME, "TestCase").put("filePath", sourceFolderUri).put("filePattern", ".*").put("sheet", "Sheet Number").put("sheetValue", "0").put("memoryTableName", "trackMemoryTableWithTTL").put("tableExpiryPeriod", "15").put("reprocess", "false").put("columnMapping", "A:FirstColumn").put("skipFirstRow", "false").put("terminateIfEmptyRow", "false").put("rowsLimit", "10").put("outputSchema", "A:string").put("ifErrorRecord", "Ignore error and continue").put("errorDatasetName", "").build();
ETLStage source = new ETLStage("ExcelInputtest", new ETLPlugin("Excel", BatchSource.PLUGIN_TYPE, sourceProperties, null));
String outputDatasetName = "output-WithTTL";
ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));
ApplicationManager appManager = deployApp(source, sink, "testWithTTL");
DataSetManager<KeyValueTable> dataSetManager = getDataset("trackMemoryTableWithTTL");
KeyValueTable keyValueTable = dataSetManager.get();
File testFile = new File(sourceFolder, excelTestFileTwo);
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DATE, -20);
keyValueTable.write(Bytes.toBytes(testFile.toURI().toString()), Bytes.toBytes(cal.getTimeInMillis()));
dataSetManager.flush();
startWorkflow(appManager, ProgramRunStatus.COMPLETED);
DataSetManager<Table> outputManager = getDataset(outputDatasetName);
List<StructuredRecord> output = MockSink.readOutput(outputManager);
Assert.assertEquals("Expected records", 9, output.size());
Assert.assertNotNull(output.get(1).getSchema().getField("FirstColumn"));
}
Aggregations