Search in sources :

Example 1 with BrowseDetail

use of io.cdap.cdap.etl.api.connector.BrowseDetail in project cdap by caskdata.

the class DataPipelineConnectionTest method testBrowseSample.

@Test
public void testBrowseSample() throws Exception {
    File directory = TEMP_FOLDER.newFolder();
    List<BrowseEntity> entities = addFilesInDirectory(directory);
    String conn = "BrowseSample";
    addConnection(conn, new ConnectionCreationRequest("", new PluginInfo(FileConnector.NAME, Connector.PLUGIN_TYPE, null, Collections.emptyMap(), // in set up we add "-mocks" as the suffix for the artifact id
    new ArtifactSelectorConfig("system", APP_ARTIFACT_ID.getArtifact() + "-mocks", APP_ARTIFACT_ID.getVersion()))));
    // get all 10 results back
    BrowseDetail browseDetail = browseConnection(conn, directory.getCanonicalPath(), 10);
    BrowseDetail expected = BrowseDetail.builder().setTotalCount(10).setEntities(entities).build();
    Assert.assertEquals(expected, browseDetail);
    // only retrieve 5 back, count should still be 10
    browseDetail = browseConnection(conn, directory.getCanonicalPath(), 5);
    expected = BrowseDetail.builder().setTotalCount(10).setEntities(entities.subList(0, 5)).build();
    Assert.assertEquals(expected, browseDetail);
    // browse the created directory, should give empty result
    browseDetail = browseConnection(conn, entities.get(0).getPath(), 10);
    expected = BrowseDetail.builder().setTotalCount(0).build();
    Assert.assertEquals(expected, browseDetail);
    // browse the file, since it is not browsable, it should return itself
    browseDetail = browseConnection(conn, entities.get(1).getPath(), 10);
    expected = BrowseDetail.builder().setTotalCount(1).addEntity(entities.get(1)).build();
    Assert.assertEquals(expected, browseDetail);
    List<StructuredRecord> records = new ArrayList<>();
    Schema schema = Schema.recordOf("schema", Schema.Field.of("offset", Schema.of(Schema.Type.LONG)), Schema.Field.of("body", Schema.of(Schema.Type.STRING)));
    for (int i = 0; i < 100; i++) {
        records.add(StructuredRecord.builder(schema).set("offset", i * 2L).set("body", "1").build());
    }
    ArtifactSelectorConfig artifact = new ArtifactSelectorConfig("SYSTEM", APP_ARTIFACT_ID.getArtifact() + "-mocks", APP_ARTIFACT_ID.getVersion());
    Map<String, String> properties = ImmutableMap.of("path", entities.get(1).getPath(), "useConnection", "true", "connection", String.format("${conn(%s)}", conn));
    ConnectorDetail detail = new ConnectorDetail(ImmutableSet.of(new PluginDetail("file", "batchsource", properties, artifact, schema), new PluginDetail("file", "streamingsource", properties, artifact, schema)));
    SampleResponse expectedSample = new SampleResponse(detail, schema, records);
    // sample the file, the file has 100 lines, so 200 should retrieve all lines
    SampleResponse sampleResponse = sampleConnection(conn, entities.get(1).getPath(), 200);
    Assert.assertEquals(expectedSample, sampleResponse);
    // sample 100, should get all
    sampleResponse = sampleConnection(conn, entities.get(1).getPath(), 100);
    Assert.assertEquals(expectedSample, sampleResponse);
    // sample 50, should only get 50
    sampleResponse = sampleConnection(conn, entities.get(1).getPath(), 50);
    expectedSample = new SampleResponse(detail, schema, records.subList(0, 50));
    Assert.assertEquals(expectedSample, sampleResponse);
    deleteConnection(conn);
}
Also used : ArtifactSelectorConfig(io.cdap.cdap.etl.proto.ArtifactSelectorConfig) BrowseEntity(io.cdap.cdap.etl.api.connector.BrowseEntity) Schema(io.cdap.cdap.api.data.schema.Schema) SampleResponse(io.cdap.cdap.etl.proto.connection.SampleResponse) ArrayList(java.util.ArrayList) ConnectorDetail(io.cdap.cdap.etl.proto.connection.ConnectorDetail) BrowseDetail(io.cdap.cdap.etl.api.connector.BrowseDetail) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) PluginDetail(io.cdap.cdap.etl.proto.connection.PluginDetail) ConnectionCreationRequest(io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest) PluginInfo(io.cdap.cdap.etl.proto.connection.PluginInfo) File(java.io.File) Test(org.junit.Test)

Example 2 with BrowseDetail

use of io.cdap.cdap.etl.api.connector.BrowseDetail in project cdap by caskdata.

the class DataPipelineConnectionTest method browseConnection.

private BrowseDetail browseConnection(String connection, String path, int limit) throws IOException {
    String url = URLEncoder.encode(String.format("v1/contexts/%s/connections/%s/browse", NamespaceId.DEFAULT.getNamespace(), connection), StandardCharsets.UTF_8.name());
    URL validatePipelineURL = serviceURI.resolve(String.format("%s?path=%s&limit=%s", url, path, limit)).toURL();
    HttpRequest.Builder request = HttpRequest.builder(HttpMethod.POST, validatePipelineURL).withBody(GSON.toJson(BrowseRequest.builder(path).setLimit(limit).build()));
    HttpResponse response = executeRequest(request);
    Assert.assertEquals("Wrong answer: " + response.getResponseBodyAsString(), expectedCode, response.getResponseCode());
    return expectedCode != HttpURLConnection.HTTP_OK ? null : GSON.fromJson(response.getResponseBodyAsString(), BrowseDetail.class);
}
Also used : HttpRequest(io.cdap.common.http.HttpRequest) HttpResponse(io.cdap.common.http.HttpResponse) BrowseDetail(io.cdap.cdap.etl.api.connector.BrowseDetail) URL(java.net.URL)

Example 3 with BrowseDetail

use of io.cdap.cdap.etl.api.connector.BrowseDetail in project cdap by caskdata.

the class FileConnector method browse.

@Override
public BrowseDetail browse(ConnectorContext context, BrowseRequest request) throws IOException {
    File file = new File(request.getPath());
    // if it does not exist, error out
    if (!file.exists()) {
        throw new IOException(String.format("The given path %s does not exist", request.getPath()));
    }
    // if it is not a directory, then it is not browsable, return the path itself
    if (!file.isDirectory()) {
        return BrowseDetail.builder().setTotalCount(1).addEntity(BrowseEntity.builder(file.getName(), request.getPath(), "file").canSample(true).build()).build();
    }
    // list the files and classify them with file and directory
    File[] files = file.listFiles();
    // sort the files by the name
    Arrays.sort(files);
    int limit = request.getLimit() == null ? files.length : Math.min(request.getLimit(), files.length);
    BrowseDetail.Builder builder = BrowseDetail.builder().setTotalCount(files.length);
    for (int i = 0; i < limit; i++) {
        File listedFile = files[i];
        if (listedFile.isDirectory()) {
            builder.addEntity(BrowseEntity.builder(listedFile.getName(), listedFile.getCanonicalPath(), "directory").canSample(true).canBrowse(true).build());
            continue;
        }
        builder.addEntity(BrowseEntity.builder(listedFile.getName(), listedFile.getCanonicalPath(), "file").canSample(true).build());
    }
    return builder.build();
}
Also used : IOException(java.io.IOException) BrowseDetail(io.cdap.cdap.etl.api.connector.BrowseDetail) File(java.io.File)

Example 4 with BrowseDetail

use of io.cdap.cdap.etl.api.connector.BrowseDetail in project cdap by caskdata.

the class RemoteConnectionBrowseTask method execute.

@Override
public String execute(SystemAppTaskContext systemAppContext, RemoteConnectionRequest request) throws Exception {
    String namespace = request.getNamespace();
    Connection connection = request.getConnection();
    // Plugin selector and configurer
    TrackedPluginSelector pluginSelector = new TrackedPluginSelector(new ArtifactSelectorProvider().getPluginSelector(connection.getPlugin().getArtifact()));
    ServicePluginConfigurer servicePluginConfigurer = systemAppContext.createServicePluginConfigurer(namespace);
    BrowseRequest browseRequest = GSON.fromJson(request.getRequest(), BrowseRequest.class);
    try (Connector connector = getConnector(systemAppContext, servicePluginConfigurer, connection.getPlugin(), namespace, pluginSelector)) {
        // configure and browse
        connector.configure(new DefaultConnectorConfigurer(servicePluginConfigurer));
        ConnectorContext connectorContext = new DefaultConnectorContext(new SimpleFailureCollector(), servicePluginConfigurer);
        BrowseDetail browseDetail = connector.browse(connectorContext, browseRequest);
        return GSON.toJson(browseDetail);
    }
}
Also used : Connector(io.cdap.cdap.etl.api.connector.Connector) ArtifactSelectorProvider(io.cdap.cdap.etl.common.ArtifactSelectorProvider) SimpleFailureCollector(io.cdap.cdap.etl.proto.validation.SimpleFailureCollector) TrackedPluginSelector(io.cdap.cdap.etl.spec.TrackedPluginSelector) DefaultConnectorContext(io.cdap.cdap.datapipeline.connection.DefaultConnectorContext) Connection(io.cdap.cdap.etl.proto.connection.Connection) ConnectorContext(io.cdap.cdap.etl.api.connector.ConnectorContext) DefaultConnectorContext(io.cdap.cdap.datapipeline.connection.DefaultConnectorContext) BrowseRequest(io.cdap.cdap.etl.api.connector.BrowseRequest) DefaultConnectorConfigurer(io.cdap.cdap.datapipeline.connection.DefaultConnectorConfigurer) BrowseDetail(io.cdap.cdap.etl.api.connector.BrowseDetail) ServicePluginConfigurer(io.cdap.cdap.api.service.http.ServicePluginConfigurer)

Aggregations

BrowseDetail (io.cdap.cdap.etl.api.connector.BrowseDetail)4 File (java.io.File)2 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)1 Schema (io.cdap.cdap.api.data.schema.Schema)1 ServicePluginConfigurer (io.cdap.cdap.api.service.http.ServicePluginConfigurer)1 DefaultConnectorConfigurer (io.cdap.cdap.datapipeline.connection.DefaultConnectorConfigurer)1 DefaultConnectorContext (io.cdap.cdap.datapipeline.connection.DefaultConnectorContext)1 BrowseEntity (io.cdap.cdap.etl.api.connector.BrowseEntity)1 BrowseRequest (io.cdap.cdap.etl.api.connector.BrowseRequest)1 Connector (io.cdap.cdap.etl.api.connector.Connector)1 ConnectorContext (io.cdap.cdap.etl.api.connector.ConnectorContext)1 ArtifactSelectorProvider (io.cdap.cdap.etl.common.ArtifactSelectorProvider)1 ArtifactSelectorConfig (io.cdap.cdap.etl.proto.ArtifactSelectorConfig)1 Connection (io.cdap.cdap.etl.proto.connection.Connection)1 ConnectionCreationRequest (io.cdap.cdap.etl.proto.connection.ConnectionCreationRequest)1 ConnectorDetail (io.cdap.cdap.etl.proto.connection.ConnectorDetail)1 PluginDetail (io.cdap.cdap.etl.proto.connection.PluginDetail)1 PluginInfo (io.cdap.cdap.etl.proto.connection.PluginInfo)1 SampleResponse (io.cdap.cdap.etl.proto.connection.SampleResponse)1 SimpleFailureCollector (io.cdap.cdap.etl.proto.validation.SimpleFailureCollector)1