Search in sources :

Example 21 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project hydrator-plugins by cdapio.

the class SolrSearchSinkTest method testSolrConnectionWithWrongCollection.

@Test
public void testSolrConnectionWithWrongCollection() throws Exception {
    String inputDatasetName = "input-source-with-wrong-collection";
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputDatasetName));
    Map<String, String> sinkConfigproperties = new ImmutableMap.Builder<String, String>().put(Constants.Reference.REFERENCE_NAME, "BatchSolrSink").put("solrMode", SolrSearchSinkConfig.SINGLE_NODE_MODE).put("solrHost", "localhost:8983").put("collectionName", "wrong_collection").put("keyField", "id").put("batchSize", "1000").put("outputFieldMappings", "office address:address").build();
    ETLStage sink = new ETLStage("SolrSink", new ETLPlugin("SolrSearch", BatchSink.PLUGIN_TYPE, sinkConfigproperties, null));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
    ApplicationId appId = NamespaceId.DEFAULT.app("testBatchSolrSink");
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    DataSetManager<Table> inputManager = getDataset(inputDatasetName);
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(inputSchema).set("id", "1").set("firstname", "Brett").set("lastname", "Lee").set("office address", "NE lake side").set("pincode", 480001).build(), StructuredRecord.builder(inputSchema).set("id", "2").set("firstname", "John").set("lastname", "Ray").set("office address", "SE lake side").set("pincode", 480002).build());
    MockSource.writeInput(inputManager, input);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRuns(ProgramRunStatus.FAILED, 1, 5, TimeUnit.MINUTES);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 22 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project hydrator-plugins by cdapio.

the class SolrSearchSinkTest method testBatchSolrSearchSink.

@Ignore
public void testBatchSolrSearchSink() throws Exception {
    client = new HttpSolrClient("http://localhost:8983/solr/collection1");
    String inputDatasetName = "solr-batch-input-source";
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputDatasetName));
    Map<String, String> sinkConfigproperties = new ImmutableMap.Builder<String, String>().put(Constants.Reference.REFERENCE_NAME, "BatchSolrSink").put("solrMode", SolrSearchSinkConfig.SINGLE_NODE_MODE).put("solrHost", "localhost:8983").put("collectionName", "collection1").put("keyField", "id").put("batchSize", "1000").put("outputFieldMappings", "office address:address").build();
    ETLStage sink = new ETLStage("SolrSink", new ETLPlugin("SolrSearch", BatchSink.PLUGIN_TYPE, sinkConfigproperties, null));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
    ApplicationId appId = NamespaceId.DEFAULT.app("testBatchSolrSink");
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
    ApplicationManager appManager = deployApplication(appId, appRequest);
    DataSetManager<Table> inputManager = getDataset(inputDatasetName);
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(inputSchema).set("id", "1").set("firstname", "Brett").set("lastname", "Lee").set("office address", "NE lake side").set("pincode", 480001).build(), StructuredRecord.builder(inputSchema).set("id", "2").set("firstname", "John").set("lastname", "Ray").set("office address", "SE lake side").set("pincode", 480002).build());
    MockSource.writeInput(inputManager, input);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRun(ProgramRunStatus.COMPLETED, 5, TimeUnit.MINUTES);
    QueryResponse queryResponse = client.query(new SolrQuery("*:*"));
    SolrDocumentList resultList = queryResponse.getResults();
    Assert.assertEquals(2, resultList.size());
    for (SolrDocument document : resultList) {
        if (document.get("id").equals("1")) {
            Assert.assertEquals("Brett", document.get("firstname"));
            Assert.assertEquals("Lee", document.get("lastname"));
            Assert.assertEquals("NE lake side", document.get("address"));
            Assert.assertEquals(480001, document.get("pincode"));
        } else {
            Assert.assertEquals("John", document.get("firstname"));
            Assert.assertEquals("Ray", document.get("lastname"));
            Assert.assertEquals("SE lake side", document.get("address"));
            Assert.assertEquals(480002, document.get("pincode"));
        }
    }
    // Clean the indexes
    client.deleteByQuery("*:*");
    client.commit();
    client.shutdown();
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) SolrDocumentList(org.apache.solr.common.SolrDocumentList) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) SolrQuery(org.apache.solr.client.solrj.SolrQuery) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) SolrDocument(org.apache.solr.common.SolrDocument) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Ignore(org.junit.Ignore)

Example 23 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project hydrator-plugins by cdapio.

the class HBaseTest method testHBaseSource.

@Test
public void testHBaseSource() throws Exception {
    Map<String, String> hBaseProps = new HashMap<>();
    hBaseProps.put("tableName", HBASE_TABLE_NAME);
    hBaseProps.put("columnFamily", HBASE_FAMILY_COLUMN);
    hBaseProps.put("zkClientPort", Integer.toString(testUtil.getZkCluster().getClientPort()));
    hBaseProps.put("schema", BODY_SCHEMA.toString());
    hBaseProps.put("rowField", "ticker");
    hBaseProps.put(Constants.Reference.REFERENCE_NAME, "HBaseSourceTest");
    ETLStage source = new ETLStage("HBase", new ETLPlugin("HBase", BatchSource.PLUGIN_TYPE, hBaseProps, null));
    String outputDatasetName = "output-hbasesourcetest";
    ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(BATCH_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("HBaseSourceTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 1, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(outputDatasetName);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Assert.assertEquals(2, outputRecords.size());
    String rowkey = outputRecords.get(0).get("ticker");
    StructuredRecord row1 = ROW1.equals(rowkey) ? outputRecords.get(0) : outputRecords.get(1);
    StructuredRecord row2 = ROW1.equals(rowkey) ? outputRecords.get(1) : outputRecords.get(0);
    Assert.assertEquals(ROW1, row1.get("ticker"));
    Assert.assertEquals(VAL1, row1.get(COL1));
    Assert.assertEquals(VAL2, row1.get(COL2));
    Assert.assertEquals(ROW2, row2.get("ticker"));
    Assert.assertEquals(VAL1, row2.get(COL1));
    Assert.assertEquals(VAL2, row2.get(COL2));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) HTable(org.apache.hadoop.hbase.client.HTable) HashMap(java.util.HashMap) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 24 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project hydrator-plugins by cdapio.

the class MongoDBTest method testMongoDBSource.

@SuppressWarnings("ConstantConditions")
@Test
public void testMongoDBSource() throws Exception {
    ETLStage source = new ETLStage("MongoDB", new ETLPlugin("MongoDB", BatchSource.PLUGIN_TYPE, new ImmutableMap.Builder<String, String>().put(MongoDBBatchSource.Properties.CONNECTION_STRING, String.format("mongodb://localhost:%d/%s.%s", mongoPort, MONGO_DB, MONGO_SOURCE_COLLECTIONS)).put(MongoDBBatchSource.Properties.SCHEMA, SOURCE_BODY_SCHEMA.toString()).put(MongoDBBatchSource.Properties.SPLITTER_CLASS, StandaloneMongoSplitter.class.getSimpleName()).put(Constants.Reference.REFERENCE_NAME, "SimpleMongoTest").build(), null));
    String outputDatasetName = "output-batchsourcetest";
    ETLStage sink = new ETLStage("sink", MockSink.getPlugin(outputDatasetName));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink).addConnection(source.getName(), sink.getName()).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("MongoSourceTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 1, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(outputDatasetName);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Assert.assertEquals(2, outputRecords.size());
    String ticker = outputRecords.get(0).get("ticker");
    StructuredRecord row1 = "AAPL".equals(ticker) ? outputRecords.get(0) : outputRecords.get(1);
    StructuredRecord row2 = "AAPL".equals(ticker) ? outputRecords.get(1) : outputRecords.get(0);
    Assert.assertEquals("AAPL", row1.get("ticker"));
    Assert.assertEquals(10, (int) row1.get("num"));
    Assert.assertEquals(23.23, (double) row1.get("price"), 0.00001);
    Assert.assertEquals("ORCL", row2.get("ticker"));
    Assert.assertEquals(12, (int) row2.get("num"));
    Assert.assertEquals(10.10, (double) row2.get("price"), 0.00001);
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) Table(io.cdap.cdap.api.dataset.table.Table) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) ImmutableMap(com.google.common.collect.ImmutableMap) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) StandaloneMongoSplitter(com.mongodb.hadoop.splitter.StandaloneMongoSplitter) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Example 25 with ETLPlugin

use of io.cdap.cdap.etl.proto.v2.ETLPlugin in project hydrator-plugins by cdapio.

the class ValueMapperTest method testWithNoDefaults.

@Test
public void testWithNoDefaults() throws Exception {
    String inputTable = "input_table_without_defaults";
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputTable));
    Map<String, String> sourceproperties = new ImmutableMap.Builder<String, String>().put("mapping", "designationid:designation_lookup_table_without_defaults:designationName").put("defaults", "").build();
    ETLStage transform = new ETLStage("transform", new ETLPlugin("ValueMapper", Transform.PLUGIN_TYPE, sourceproperties, null));
    String sinkTable = "output_table_without_defaults";
    ETLStage sink = new ETLStage("sink", MockSink.getPlugin(sinkTable));
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(transform).addStage(sink).addConnection(source.getName(), transform.getName()).addConnection(transform.getName(), sink.getName()).build();
    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(BATCH_ARTIFACT, etlConfig);
    ApplicationId appId = NamespaceId.DEFAULT.app("valuemappertest_without_defaults");
    ApplicationManager appManager = deployApplication(appId, appRequest);
    addDatasetInstance(KeyValueTable.class.getName(), "designation_lookup_table_without_defaults");
    DataSetManager<KeyValueTable> dataSetManager = getDataset("designation_lookup_table_without_defaults");
    KeyValueTable keyValueTable = dataSetManager.get();
    keyValueTable.write("1".getBytes(Charsets.UTF_8), "SE".getBytes(Charsets.UTF_8));
    keyValueTable.write("2".getBytes(Charsets.UTF_8), "SSE".getBytes(Charsets.UTF_8));
    keyValueTable.write("3".getBytes(Charsets.UTF_8), "ML".getBytes(Charsets.UTF_8));
    keyValueTable.write("4".getBytes(Charsets.UTF_8), "TL".getBytes(Charsets.UTF_8));
    dataSetManager.flush();
    DataSetManager<Table> inputManager = getDataset(inputTable);
    List<StructuredRecord> input = ImmutableList.of(StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "100").set(NAME, "John").set(SALARY, "1000").set(DESIGNATIONID, null).build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "101").set(NAME, "Kerry").set(SALARY, "1030").set(DESIGNATIONID, "2").build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "102").set(NAME, "Mathew").set(SALARY, "1230").set(DESIGNATIONID, "").build(), StructuredRecord.builder(SOURCE_SCHEMA).set(ID, "103").set(NAME, "Allie").set(SALARY, "2000").set(DESIGNATIONID, "4").build());
    MockSource.writeInput(inputManager, input);
    WorkflowManager workflowManager = appManager.getWorkflowManager(SmartWorkflow.NAME);
    workflowManager.start();
    workflowManager.waitForRuns(ProgramRunStatus.COMPLETED, 1, 5, TimeUnit.MINUTES);
    DataSetManager<Table> outputManager = getDataset(sinkTable);
    List<StructuredRecord> outputRecords = MockSink.readOutput(outputManager);
    Map<String, String> nameDesignationMap = new HashMap<>();
    nameDesignationMap.put("John", null);
    nameDesignationMap.put("Kerry", "SSE");
    nameDesignationMap.put("Mathew", "");
    nameDesignationMap.put("Allie", "TL");
    Map<String, String> nameSalaryMap = new HashMap<>();
    nameSalaryMap.put("John", "1000");
    nameSalaryMap.put("Kerry", "1030");
    nameSalaryMap.put("Mathew", "1230");
    nameSalaryMap.put("Allie", "2000");
    Assert.assertEquals(4, outputRecords.size());
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(0).get(NAME)), outputRecords.get(0).get(DESIGNATIONNAME));
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(1).get(NAME)), outputRecords.get(1).get(DESIGNATIONNAME));
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(2).get(NAME)), outputRecords.get(2).get(DESIGNATIONNAME));
    Assert.assertEquals(nameDesignationMap.get(outputRecords.get(3).get(NAME)), outputRecords.get(3).get(DESIGNATIONNAME));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(0).get(NAME)), outputRecords.get(0).get(SALARY));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(1).get(NAME)), outputRecords.get(1).get(SALARY));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(2).get(NAME)), outputRecords.get(2).get(SALARY));
    Assert.assertEquals(nameSalaryMap.get(outputRecords.get(3).get(NAME)), outputRecords.get(3).get(SALARY));
}
Also used : ApplicationManager(io.cdap.cdap.test.ApplicationManager) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) Table(io.cdap.cdap.api.dataset.table.Table) HashMap(java.util.HashMap) WorkflowManager(io.cdap.cdap.test.WorkflowManager) ETLPlugin(io.cdap.cdap.etl.proto.v2.ETLPlugin) ImmutableMap(com.google.common.collect.ImmutableMap) StructuredRecord(io.cdap.cdap.api.data.format.StructuredRecord) AppRequest(io.cdap.cdap.proto.artifact.AppRequest) ETLBatchConfig(io.cdap.cdap.etl.proto.v2.ETLBatchConfig) ETLStage(io.cdap.cdap.etl.proto.v2.ETLStage) KeyValueTable(io.cdap.cdap.api.dataset.lib.KeyValueTable) ApplicationId(io.cdap.cdap.proto.id.ApplicationId) Test(org.junit.Test)

Aggregations

ETLPlugin (io.cdap.cdap.etl.proto.v2.ETLPlugin)154 ETLStage (io.cdap.cdap.etl.proto.v2.ETLStage)144 Test (org.junit.Test)125 ApplicationManager (io.cdap.cdap.test.ApplicationManager)102 ETLBatchConfig (io.cdap.cdap.etl.proto.v2.ETLBatchConfig)101 Table (io.cdap.cdap.api.dataset.table.Table)79 StructuredRecord (io.cdap.cdap.api.data.format.StructuredRecord)77 ApplicationId (io.cdap.cdap.proto.id.ApplicationId)70 AppRequest (io.cdap.cdap.proto.artifact.AppRequest)59 Schema (io.cdap.cdap.api.data.schema.Schema)55 HashMap (java.util.HashMap)55 WorkflowManager (io.cdap.cdap.test.WorkflowManager)53 ImmutableMap (com.google.common.collect.ImmutableMap)36 KeyValueTable (io.cdap.cdap.api.dataset.lib.KeyValueTable)35 HashSet (java.util.HashSet)35 ArrayList (java.util.ArrayList)34 StageValidationResponse (io.cdap.cdap.etl.proto.v2.validation.StageValidationResponse)20 StageValidationRequest (io.cdap.cdap.etl.proto.v2.validation.StageValidationRequest)18 File (java.io.File)17 FileSet (io.cdap.cdap.api.dataset.lib.FileSet)14