use of io.cdap.cdap.etl.proto.ArtifactSelectorConfig in project cdap by caskdata.
the class DataPipelineConnectionTest method testBrowseSample.
@Test
public void testBrowseSample() throws Exception {
File directory = TEMP_FOLDER.newFolder();
List<BrowseEntity> entities = addFilesInDirectory(directory);
String conn = "BrowseSample";
addConnection(conn, new ConnectionCreationRequest("", new PluginInfo(FileConnector.NAME, Connector.PLUGIN_TYPE, null, Collections.emptyMap(), // in set up we add "-mocks" as the suffix for the artifact id
new ArtifactSelectorConfig("system", APP_ARTIFACT_ID.getArtifact() + "-mocks", APP_ARTIFACT_ID.getVersion()))));
// get all 10 results back
BrowseDetail browseDetail = browseConnection(conn, directory.getCanonicalPath(), 10);
BrowseDetail expected = BrowseDetail.builder().setTotalCount(10).setEntities(entities).build();
Assert.assertEquals(expected, browseDetail);
// only retrieve 5 back, count should still be 10
browseDetail = browseConnection(conn, directory.getCanonicalPath(), 5);
expected = BrowseDetail.builder().setTotalCount(10).setEntities(entities.subList(0, 5)).build();
Assert.assertEquals(expected, browseDetail);
// browse the created directory, should give empty result
browseDetail = browseConnection(conn, entities.get(0).getPath(), 10);
expected = BrowseDetail.builder().setTotalCount(0).build();
Assert.assertEquals(expected, browseDetail);
// browse the file, since it is not browsable, it should return itself
browseDetail = browseConnection(conn, entities.get(1).getPath(), 10);
expected = BrowseDetail.builder().setTotalCount(1).addEntity(entities.get(1)).build();
Assert.assertEquals(expected, browseDetail);
List<StructuredRecord> records = new ArrayList<>();
Schema schema = Schema.recordOf("schema", Schema.Field.of("offset", Schema.of(Schema.Type.LONG)), Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
for (int i = 0; i < 100; i++) {
records.add(StructuredRecord.builder(schema).set("offset", i * 2L).set("body", "1").build());
}
ArtifactSelectorConfig artifact = new ArtifactSelectorConfig("SYSTEM", APP_ARTIFACT_ID.getArtifact() + "-mocks", APP_ARTIFACT_ID.getVersion());
Map<String, String> properties = ImmutableMap.of("path", entities.get(1).getPath(), "useConnection", "true", "connection", String.format("${conn(%s)}", conn));
ConnectorDetail detail = new ConnectorDetail(ImmutableSet.of(new PluginDetail("file", "batchsource", properties, artifact, schema), new PluginDetail("file", "streamingsource", properties, artifact, schema)));
SampleResponse expectedSample = new SampleResponse(detail, schema, records);
// sample the file, the file has 100 lines, so 200 should retrieve all lines
SampleResponse sampleResponse = sampleConnection(conn, entities.get(1).getPath(), 200);
Assert.assertEquals(expectedSample, sampleResponse);
// sample 100, should get all
sampleResponse = sampleConnection(conn, entities.get(1).getPath(), 100);
Assert.assertEquals(expectedSample, sampleResponse);
// sample 50, should only get 50
sampleResponse = sampleConnection(conn, entities.get(1).getPath(), 50);
expectedSample = new SampleResponse(detail, schema, records.subList(0, 50));
Assert.assertEquals(expectedSample, sampleResponse);
deleteConnection(conn);
}
use of io.cdap.cdap.etl.proto.ArtifactSelectorConfig in project cdap by caskdata.
the class DataPipelineConnectionTest method testUsingConnections.
private void testUsingConnections(Engine engine) throws Exception {
String sourceConnName = "sourceConn " + engine;
String sinkConnName = "sinkConn " + engine;
String srcTableName = "src" + engine;
String sinkTableName = "sink" + engine;
// add some bad json object to the property
addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", srcTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", sinkTableName, "key1", "${badval}"), new ArtifactSelectorConfig())));
// add json string to the runtime arguments to ensure plugin can get instantiated under such condition
Map<String, String> runtimeArguments = Collections.singletonMap("badval", "{\"a\" : 1}");
// source -> sink
ETLBatchConfig config = ETLBatchConfig.builder().setEngine(engine).addStage(new ETLStage("source", MockSource.getPluginUsingConnection(sourceConnName))).addStage(new ETLStage("sink", MockSink.getPluginUsingConnection(sinkConnName))).addConnection("source", "sink").build();
Schema schema = Schema.recordOf("x", Schema.Field.of("name", Schema.of(Schema.Type.STRING)));
StructuredRecord samuel = StructuredRecord.builder(schema).set("name", "samuel").build();
StructuredRecord dwayne = StructuredRecord.builder(schema).set("name", "dwayne").build();
// add the dataset by the test, the source won't create it since table name is macro enabled
addDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName), Table.class.getName());
DataSetManager<Table> sourceTable = getDataset(srcTableName);
MockSource.writeInput(sourceTable, ImmutableList.of(samuel, dwayne));
// verify preview can run successfully using connections
PreviewManager previewManager = getPreviewManager();
PreviewConfig previewConfig = new PreviewConfig(SmartWorkflow.NAME, ProgramType.WORKFLOW, runtimeArguments, 10);
// Start the preview and get the corresponding PreviewRunner.
ApplicationId previewId = previewManager.start(NamespaceId.DEFAULT, new AppRequest<>(APP_ARTIFACT, config, previewConfig));
// Wait for the preview status go into COMPLETED.
Tasks.waitFor(PreviewStatus.Status.COMPLETED, new Callable<PreviewStatus.Status>() {
@Override
public PreviewStatus.Status call() throws Exception {
PreviewStatus status = previewManager.getStatus(previewId);
return status == null ? null : status.getStatus();
}
}, 5, TimeUnit.MINUTES);
AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(APP_ARTIFACT, config);
ApplicationId appId = NamespaceId.DEFAULT.app("testApp" + engine);
ApplicationManager appManager = deployApplication(appId, appRequest);
// start the actual pipeline run
WorkflowManager manager = appManager.getWorkflowManager(SmartWorkflow.NAME);
manager.startAndWaitForGoodRun(runtimeArguments, ProgramRunStatus.COMPLETED, 3, TimeUnit.MINUTES);
DataSetManager<Table> sinkTable = getDataset(sinkTableName);
List<StructuredRecord> outputRecords = MockSink.readOutput(sinkTable);
Assert.assertEquals(ImmutableSet.of(dwayne, samuel), new HashSet<>(outputRecords));
// modify the connection to use a new table name for source and sink
String newSrcTableName = "new" + srcTableName;
String newSinkTableName = "new" + sinkTableName;
addConnection(sourceConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSrcTableName), new ArtifactSelectorConfig())));
addConnection(sinkConnName, new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", newSinkTableName), new ArtifactSelectorConfig())));
addDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName), Table.class.getName());
StructuredRecord newRecord1 = StructuredRecord.builder(schema).set("name", "john").build();
StructuredRecord newRecord2 = StructuredRecord.builder(schema).set("name", "tom").build();
sourceTable = getDataset(newSrcTableName);
MockSource.writeInput(sourceTable, ImmutableList.of(newRecord1, newRecord2));
// run the program again, it should use the new table to read and write
manager.start(runtimeArguments);
manager.waitForRuns(ProgramRunStatus.COMPLETED, 2, 3, TimeUnit.MINUTES);
sinkTable = getDataset(newSinkTableName);
outputRecords = MockSink.readOutput(sinkTable);
Assert.assertEquals(ImmutableSet.of(newRecord1, newRecord2), new HashSet<>(outputRecords));
deleteConnection(sourceConnName);
deleteConnection(sinkConnName);
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(srcTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(sinkTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSrcTableName));
deleteDatasetInstance(NamespaceId.DEFAULT.dataset(newSinkTableName));
}
use of io.cdap.cdap.etl.proto.ArtifactSelectorConfig in project cdap by caskdata.
the class DataPipelineServiceTest method testConnectionMacroSubstitution.
@Test
public void testConnectionMacroSubstitution() throws Exception {
String stageName = "tx";
// test using no connection macro, it should behave as a normal plugin
Map<String, String> properties = Collections.singletonMap("tableName", "test");
ETLStage stage = new ETLStage(stageName, new ETLPlugin(MockSource.NAME, BatchSource.PLUGIN_TYPE, properties));
StageValidationRequest requestBody = new StageValidationRequest(stage, Collections.emptyList(), false);
StageValidationResponse actual = sendRequest(requestBody);
Assert.assertTrue(actual.getFailures().isEmpty());
// use a connection macro, the validation should also pass
properties = Collections.singletonMap("connectionConfig", "${conn(testconn)}");
addConnection("testconn", new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, Collections.singletonMap("tableName", "newtest"), new ArtifactSelectorConfig())));
stage = new ETLStage(stageName + "conn", new ETLPlugin(MockSource.NAME, BatchSource.PLUGIN_TYPE, properties));
requestBody = new StageValidationRequest(stage, Collections.emptyList(), false);
actual = sendRequest(requestBody);
Assert.assertTrue(actual.getFailures().isEmpty());
// test the properties can still be correctly set if the connection property get evaluated to a json object
addConnection("testconn", new ConnectionCreationRequest("", new PluginInfo("test", "dummy", null, ImmutableMap.of("tableName", "aaa", "key1", "${badval}"), new ArtifactSelectorConfig())));
getPreferencesService().setProperties(NamespaceId.DEFAULT, Collections.singletonMap("badval", "{\"a\" : 1}"));
// test it can still pass validation
actual = sendRequest(new StageValidationRequest(stage, Collections.emptyList(), true));
Assert.assertTrue(actual.getFailures().isEmpty());
}
use of io.cdap.cdap.etl.proto.ArtifactSelectorConfig in project cdap by caskdata.
the class DataPipelineServiceTest method testValidateStagePluginNotFound.
@Test
public void testValidateStagePluginNotFound() throws Exception {
String name = MockSource.NAME;
String type = BatchSource.PLUGIN_TYPE;
ArtifactSelectorConfig requestedArtifact = new ArtifactSelectorConfig(ArtifactScope.USER.name(), batchMocksArtifactId.getArtifact() + "-ghost", batchMocksArtifactId.getVersion());
String stageName = "src";
ETLStage stage = new ETLStage(stageName, new ETLPlugin(name, type, Collections.emptyMap(), requestedArtifact));
StageValidationResponse actual = sendRequest(new StageValidationRequest(stage, Collections.emptyList(), false));
Assert.assertEquals(1, actual.getFailures().size());
ValidationFailure failure = actual.getFailures().iterator().next();
Assert.assertEquals(stageName, failure.getCauses().get(0).getAttribute(CauseAttributes.PLUGIN_ID));
Assert.assertEquals(type, failure.getCauses().get(0).getAttribute(CauseAttributes.PLUGIN_TYPE));
Assert.assertEquals(name, failure.getCauses().get(0).getAttribute(CauseAttributes.PLUGIN_NAME));
Assert.assertEquals(requestedArtifact.getName(), failure.getCauses().get(0).getAttribute(CauseAttributes.REQUESTED_ARTIFACT_NAME));
Assert.assertEquals(requestedArtifact.getScope(), failure.getCauses().get(0).getAttribute(CauseAttributes.REQUESTED_ARTIFACT_SCOPE));
Assert.assertEquals(requestedArtifact.getVersion(), failure.getCauses().get(0).getAttribute(CauseAttributes.REQUESTED_ARTIFACT_VERSION));
Assert.assertEquals(batchMocksArtifactId.getArtifact(), failure.getCauses().get(0).getAttribute(CauseAttributes.SUGGESTED_ARTIFACT_NAME));
Assert.assertEquals(ArtifactScope.SYSTEM.name(), failure.getCauses().get(0).getAttribute(CauseAttributes.SUGGESTED_ARTIFACT_SCOPE));
Assert.assertEquals(batchMocksArtifactId.getVersion(), failure.getCauses().get(0).getAttribute(CauseAttributes.SUGGESTED_ARTIFACT_VERSION));
}
use of io.cdap.cdap.etl.proto.ArtifactSelectorConfig in project cdap by caskdata.
the class DataPipelineConnectionTest method testConnectionMetrics.
@Test
public void testConnectionMetrics() throws Exception {
File directory = TEMP_FOLDER.newFolder();
List<BrowseEntity> entities = addFilesInDirectory(directory);
ConnectionCreationRequest connRequest = new ConnectionCreationRequest("", new PluginInfo(FileConnector.NAME, Connector.PLUGIN_TYPE, null, Collections.emptyMap(), // in set up we add "-mocks" as the suffix for the artifact id
new ArtifactSelectorConfig("system", APP_ARTIFACT_ID.getArtifact() + "-mocks", APP_ARTIFACT_ID.getVersion())));
ConnectionCreationRequest dummyRequest = new ConnectionCreationRequest("", new PluginInfo("dummy", Connector.PLUGIN_TYPE, null, Collections.emptyMap(), // in set up we add "-mocks" as the suffix for the artifact id
new ArtifactSelectorConfig("system", APP_ARTIFACT_ID.getArtifact() + "-mocks", APP_ARTIFACT_ID.getVersion())));
Map<String, String> tagsFile = new HashMap<>(SERVICE_TAGS);
tagsFile.put(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME);
tagsFile.put(Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, FileConnector.NAME);
Map<String, String> tagsDummy = new HashMap<>(SERVICE_TAGS);
tagsDummy.put(Constants.Metrics.Tag.APP_ENTITY_TYPE, Constants.CONNECTION_SERVICE_NAME);
tagsDummy.put(Constants.Metrics.Tag.APP_ENTITY_TYPE_NAME, "dummy");
// this is needed because studio service is running through the entire tests, so we need to ensure old
// metrics emitted by other tests do not affect this one
long existingMetricsTotal = getMetricsManager().getTotalMetric(SERVICE_TAGS, "user." + Constants.Metrics.Connection.CONNECTION_COUNT);
long existingMetricsFile = getMetricsManager().getTotalMetric(tagsFile, "user." + Constants.Metrics.Connection.CONNECTION_COUNT);
long existingMetricsDummy = getMetricsManager().getTotalMetric(tagsDummy, "user." + Constants.Metrics.Connection.CONNECTION_COUNT);
// add 5 file connections, add 5 dummy connections without the artifact
for (int i = 0; i < 5; i++) {
addConnection("conn" + i, connRequest);
}
for (int i = 5; i < 10; i++) {
addConnection("conn" + i, dummyRequest);
}
// validate 10 conns added, 5 for file, 5 for dummy
validateMetrics(SERVICE_TAGS, Constants.Metrics.Connection.CONNECTION_COUNT, existingMetricsTotal, 10L);
validateMetrics(tagsFile, Constants.Metrics.Connection.CONNECTION_COUNT, existingMetricsFile, 5L);
validateMetrics(tagsDummy, Constants.Metrics.Connection.CONNECTION_COUNT, existingMetricsDummy, 5L);
// add 5 more dummy connections
for (int i = 10; i < 15; i++) {
addConnection("conn" + i, dummyRequest);
}
// validate 15 conns added, 5 files, 10 dummy
validateMetrics(SERVICE_TAGS, Constants.Metrics.Connection.CONNECTION_COUNT, existingMetricsTotal, 15L);
validateMetrics(tagsFile, Constants.Metrics.Connection.CONNECTION_COUNT, existingMetricsFile, 5L);
validateMetrics(tagsDummy, Constants.Metrics.Connection.CONNECTION_COUNT, existingMetricsDummy, 10L);
// get old get metrics number
existingMetricsTotal = getMetricsManager().getTotalMetric(SERVICE_TAGS, "user." + Constants.Metrics.Connection.CONNECTION_GET_COUNT);
existingMetricsFile = getMetricsManager().getTotalMetric(tagsFile, "user." + Constants.Metrics.Connection.CONNECTION_GET_COUNT);
existingMetricsDummy = getMetricsManager().getTotalMetric(tagsDummy, "user." + Constants.Metrics.Connection.CONNECTION_GET_COUNT);
// get these 15 conns
for (int i = 0; i < 15; i++) {
getConnection("conn" + i);
}
// validate 15 get metrics for these connections, 5 for file, 10 for dummy
validateMetrics(SERVICE_TAGS, Constants.Metrics.Connection.CONNECTION_GET_COUNT, existingMetricsTotal, 15L);
validateMetrics(tagsFile, Constants.Metrics.Connection.CONNECTION_GET_COUNT, existingMetricsFile, 5L);
validateMetrics(tagsDummy, Constants.Metrics.Connection.CONNECTION_GET_COUNT, existingMetricsDummy, 10L);
// get old browse number
existingMetricsTotal = getMetricsManager().getTotalMetric(SERVICE_TAGS, "user." + Constants.Metrics.Connection.CONNECTION_BROWSE_COUNT);
existingMetricsFile = getMetricsManager().getTotalMetric(tagsFile, "user." + Constants.Metrics.Connection.CONNECTION_BROWSE_COUNT);
// browse each file connection twice
for (int i = 0; i < 5; i++) {
browseConnection("conn" + i, directory.getCanonicalPath(), 10);
browseConnection("conn" + i, directory.getCanonicalPath(), 10);
}
// validate 10 browse metrics are emitted for file
validateMetrics(SERVICE_TAGS, Constants.Metrics.Connection.CONNECTION_BROWSE_COUNT, existingMetricsTotal, 10L);
validateMetrics(tagsFile, Constants.Metrics.Connection.CONNECTION_BROWSE_COUNT, existingMetricsFile, 10L);
existingMetricsTotal = getMetricsManager().getTotalMetric(SERVICE_TAGS, "user." + Constants.Metrics.Connection.CONNECTION_SAMPLE_COUNT);
existingMetricsFile = getMetricsManager().getTotalMetric(tagsFile, "user." + Constants.Metrics.Connection.CONNECTION_SAMPLE_COUNT);
long existingMetricsSpecTotal = getMetricsManager().getTotalMetric(SERVICE_TAGS, "user." + Constants.Metrics.Connection.CONNECTION_SPEC_COUNT);
long existingMetricsSpecFile = getMetricsManager().getTotalMetric(tagsFile, "user." + Constants.Metrics.Connection.CONNECTION_SPEC_COUNT);
// sample each file connection
for (int i = 0; i < 5; i++) {
sampleConnection("conn" + i, entities.get(1).getPath(), 10);
}
// validate 5 sample and spec metrics are emitted for file
validateMetrics(SERVICE_TAGS, Constants.Metrics.Connection.CONNECTION_SAMPLE_COUNT, existingMetricsTotal, 5L);
validateMetrics(tagsFile, Constants.Metrics.Connection.CONNECTION_SAMPLE_COUNT, existingMetricsFile, 5L);
validateMetrics(SERVICE_TAGS, Constants.Metrics.Connection.CONNECTION_SPEC_COUNT, existingMetricsSpecTotal, 5L);
validateMetrics(tagsFile, Constants.Metrics.Connection.CONNECTION_SPEC_COUNT, existingMetricsSpecFile, 5L);
// get existing delete number
existingMetricsTotal = getMetricsManager().getTotalMetric(SERVICE_TAGS, "user." + Constants.Metrics.Connection.CONNECTION_DELETED_COUNT);
existingMetricsFile = getMetricsManager().getTotalMetric(tagsFile, "user." + Constants.Metrics.Connection.CONNECTION_DELETED_COUNT);
existingMetricsDummy = getMetricsManager().getTotalMetric(tagsDummy, "user." + Constants.Metrics.Connection.CONNECTION_DELETED_COUNT);
// delete all connections
for (int i = 0; i < 15; i++) {
deleteConnection("conn" + i);
}
// validate 15 delete metrics for these connections, 5 for file, 10 for dummy
validateMetrics(SERVICE_TAGS, Constants.Metrics.Connection.CONNECTION_DELETED_COUNT, existingMetricsTotal, 15L);
validateMetrics(tagsFile, Constants.Metrics.Connection.CONNECTION_DELETED_COUNT, existingMetricsFile, 5L);
validateMetrics(tagsDummy, Constants.Metrics.Connection.CONNECTION_DELETED_COUNT, existingMetricsDummy, 10L);
}
Aggregations