Search in sources :

Example 6 with DistributedMapCacheClient

use of org.apache.nifi.distributed.cache.client.DistributedMapCacheClient in project nifi by apache.

the class TestHBase_1_1_2_ClientMapCacheService method testGet.

@Test
public void testGet() throws InitializationException, IOException {
    final String row = "row1";
    final String content = "content1";
    final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
    // Mock an HBase Table so we can verify the put operations later
    final Table table = Mockito.mock(Table.class);
    when(table.getName()).thenReturn(TableName.valueOf(tableName));
    // create the controller service and link it to the test processor
    final MockHBaseClientService service = configureHBaseClientService(runner, table);
    runner.assertValid(service);
    final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
    runner.assertValid(cacheService);
    final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE).asControllerService(DistributedMapCacheClient.class);
    hBaseCacheService.put(row, content, stringSerializer, stringSerializer);
    final String result = hBaseCacheService.get(row, stringSerializer, stringDeserializer);
    assertEquals(content, result);
}
Also used : Table(org.apache.hadoop.hbase.client.Table) DistributedMapCacheClient(org.apache.nifi.distributed.cache.client.DistributedMapCacheClient) TestRunner(org.apache.nifi.util.TestRunner) Test(org.junit.Test)

Example 7 with DistributedMapCacheClient

use of org.apache.nifi.distributed.cache.client.DistributedMapCacheClient in project nifi by apache.

the class TestHBase_1_1_2_ClientMapCacheService method testContainsKey.

@Test
public void testContainsKey() throws InitializationException, IOException {
    final String row = "row1";
    final String content = "content1";
    final TestRunner runner = TestRunners.newTestRunner(TestProcessor.class);
    // Mock an HBase Table so we can verify the put operations later
    final Table table = Mockito.mock(Table.class);
    when(table.getName()).thenReturn(TableName.valueOf(tableName));
    // create the controller service and link it to the test processor
    final MockHBaseClientService service = configureHBaseClientService(runner, table);
    runner.assertValid(service);
    final HBaseClientService hBaseClientService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CLIENT_SERVICE).asControllerService(HBaseClientService.class);
    final DistributedMapCacheClient cacheService = configureHBaseCacheService(runner, hBaseClientService);
    runner.assertValid(cacheService);
    final DistributedMapCacheClient hBaseCacheService = runner.getProcessContext().getProperty(TestProcessor.HBASE_CACHE_SERVICE).asControllerService(DistributedMapCacheClient.class);
    assertFalse(hBaseCacheService.containsKey(row, stringSerializer));
    hBaseCacheService.put(row, content, stringSerializer, stringSerializer);
    assertTrue(hBaseCacheService.containsKey(row, stringSerializer));
}
Also used : Table(org.apache.hadoop.hbase.client.Table) DistributedMapCacheClient(org.apache.nifi.distributed.cache.client.DistributedMapCacheClient) TestRunner(org.apache.nifi.util.TestRunner) Test(org.junit.Test)

Example 8 with DistributedMapCacheClient

use of org.apache.nifi.distributed.cache.client.DistributedMapCacheClient in project nifi by apache.

the class GetHBase method parseColumns.

@OnScheduled
public void parseColumns(final ProcessContext context) throws IOException {
    final StateMap stateMap = context.getStateManager().getState(Scope.CLUSTER);
    if (stateMap.getVersion() < 0) {
        // no state has been stored in the State Manager - check if we have state stored in the
        // DistributedMapCacheClient service and migrate it if so
        final DistributedMapCacheClient client = context.getProperty(DISTRIBUTED_CACHE_SERVICE).asControllerService(DistributedMapCacheClient.class);
        final ScanResult scanResult = getState(client);
        if (scanResult != null) {
            storeState(scanResult, context.getStateManager());
        }
        clearState(client);
    }
    final String columnsValue = context.getProperty(COLUMNS).getValue();
    final String[] columns = (columnsValue == null || columnsValue.isEmpty() ? new String[0] : columnsValue.split(","));
    this.columns.clear();
    for (final String column : columns) {
        if (column.contains(":")) {
            final String[] parts = column.split(":");
            final byte[] cf = parts[0].getBytes(Charset.forName("UTF-8"));
            final byte[] cq = parts[1].getBytes(Charset.forName("UTF-8"));
            this.columns.add(new Column(cf, cq));
        } else {
            final byte[] cf = column.getBytes(Charset.forName("UTF-8"));
            this.columns.add(new Column(cf, null));
        }
    }
}
Also used : DistributedMapCacheClient(org.apache.nifi.distributed.cache.client.DistributedMapCacheClient) Column(org.apache.nifi.hbase.scan.Column) StateMap(org.apache.nifi.components.state.StateMap) OnScheduled(org.apache.nifi.annotation.lifecycle.OnScheduled)

Example 9 with DistributedMapCacheClient

use of org.apache.nifi.distributed.cache.client.DistributedMapCacheClient in project kylo by Teradata.

the class SetSavepointTest method setup.

@Before
public void setup() throws InitializationException {
    runner = TestRunners.newTestRunner(SetSavepoint.class);
    final SpringContextService springService = new MockSpringContextService();
    DistributedMapCacheClient client = new MockDistributedMapCacheClient();
    final Map<String, String> clientProperties = new HashMap<>();
    runner.addControllerService("client", client, clientProperties);
    runner.enableControllerService(client);
    DistributedSavepointController service = new DistributedSavepointController();
    final Map<String, String> serviceProperties = new HashMap<>();
    serviceProperties.put("distributed-cache-service", "client");
    runner.addControllerService("service", service, serviceProperties);
    runner.addControllerService(SPRING_SERVICE_IDENTIFIER, springService);
    runner.setProperty(service, DistributedSavepointController.SPRING_SERVICE, SPRING_SERVICE_IDENTIFIER);
    runner.enableControllerService(springService);
    runner.enableControllerService(service);
    runner.setProperty(SetSavepoint.SAVEPOINT_SERVICE, "service");
    runner.setProperty(SetSavepoint.EXPIRATION_DURATION, "24h");
    runner.setProperty(SetSavepoint.SAVEPOINT_ID, "${savepointid}");
    this.savepointId = "sp1";
    runner.setThreadCount(1);
    // Setup existing cache entry.
    provider = service.getProvider();
}
Also used : DistributedMapCacheClient(org.apache.nifi.distributed.cache.client.DistributedMapCacheClient) HashMap(java.util.HashMap) SpringContextService(com.thinkbiganalytics.nifi.core.api.spring.SpringContextService) DistributedSavepointController(com.thinkbiganalytics.nifi.v2.core.savepoint.DistributedSavepointController) Before(org.junit.Before)

Example 10 with DistributedMapCacheClient

use of org.apache.nifi.distributed.cache.client.DistributedMapCacheClient in project nifi by apache.

the class DetectDuplicate method onTrigger.

@Override
public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
    FlowFile flowFile = session.get();
    if (flowFile == null) {
        return;
    }
    final ComponentLog logger = getLogger();
    final String cacheKey = context.getProperty(CACHE_ENTRY_IDENTIFIER).evaluateAttributeExpressions(flowFile).getValue();
    if (StringUtils.isBlank(cacheKey)) {
        logger.error("FlowFile {} has no attribute for given Cache Entry Identifier", new Object[] { flowFile });
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        return;
    }
    final DistributedMapCacheClient cache = context.getProperty(DISTRIBUTED_CACHE_SERVICE).asControllerService(DistributedMapCacheClient.class);
    final Long durationMS = context.getProperty(AGE_OFF_DURATION).asTimePeriod(TimeUnit.MILLISECONDS);
    final long now = System.currentTimeMillis();
    try {
        final String flowFileDescription = context.getProperty(FLOWFILE_DESCRIPTION).evaluateAttributeExpressions(flowFile).getValue();
        final CacheValue cacheValue = new CacheValue(flowFileDescription, now);
        final CacheValue originalCacheValue;
        final boolean shouldCacheIdentifier = context.getProperty(CACHE_IDENTIFIER).asBoolean();
        if (shouldCacheIdentifier) {
            originalCacheValue = cache.getAndPutIfAbsent(cacheKey, cacheValue, keySerializer, valueSerializer, valueDeserializer);
        } else {
            originalCacheValue = cache.get(cacheKey, keySerializer, valueDeserializer);
        }
        boolean duplicate = originalCacheValue != null;
        if (duplicate && durationMS != null && (now >= originalCacheValue.getEntryTimeMS() + durationMS)) {
            boolean status = cache.remove(cacheKey, keySerializer);
            logger.debug("Removal of expired cached entry with key {} returned {}", new Object[] { cacheKey, status });
            // both should typically result in duplicate being false...but, better safe than sorry
            if (shouldCacheIdentifier) {
                duplicate = !cache.putIfAbsent(cacheKey, cacheValue, keySerializer, valueSerializer);
            } else {
                duplicate = cache.containsKey(cacheKey, keySerializer);
            }
        }
        if (duplicate) {
            session.getProvenanceReporter().route(flowFile, REL_DUPLICATE, "Duplicate of: " + ORIGINAL_DESCRIPTION_ATTRIBUTE_NAME);
            String originalFlowFileDescription = originalCacheValue.getDescription();
            flowFile = session.putAttribute(flowFile, ORIGINAL_DESCRIPTION_ATTRIBUTE_NAME, originalFlowFileDescription);
            session.transfer(flowFile, REL_DUPLICATE);
            logger.info("Found {} to be a duplicate of FlowFile with description {}", new Object[] { flowFile, originalFlowFileDescription });
            session.adjustCounter("Duplicates Detected", 1L, false);
        } else {
            session.getProvenanceReporter().route(flowFile, REL_NON_DUPLICATE);
            session.transfer(flowFile, REL_NON_DUPLICATE);
            logger.info("Could not find a duplicate entry in cache for {}; routing to non-duplicate", new Object[] { flowFile });
            session.adjustCounter("Non-Duplicate Files Processed", 1L, false);
        }
    } catch (final IOException e) {
        flowFile = session.penalize(flowFile);
        session.transfer(flowFile, REL_FAILURE);
        logger.error("Unable to communicate with cache when processing {} due to {}", new Object[] { flowFile, e });
    }
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) DistributedMapCacheClient(org.apache.nifi.distributed.cache.client.DistributedMapCacheClient) IOException(java.io.IOException) ComponentLog(org.apache.nifi.logging.ComponentLog)

Aggregations

DistributedMapCacheClient (org.apache.nifi.distributed.cache.client.DistributedMapCacheClient)13 Table (org.apache.hadoop.hbase.client.Table)5 TestRunner (org.apache.nifi.util.TestRunner)5 Test (org.junit.Test)5 IOException (java.io.IOException)4 SpringContextService (com.thinkbiganalytics.nifi.core.api.spring.SpringContextService)3 HashMap (java.util.HashMap)3 Put (org.apache.hadoop.hbase.client.Put)3 FlowFile (org.apache.nifi.flowfile.FlowFile)3 ComponentLog (org.apache.nifi.logging.ComponentLog)3 DistributedSavepointController (com.thinkbiganalytics.nifi.v2.core.savepoint.DistributedSavepointController)2 OnScheduled (org.apache.nifi.annotation.lifecycle.OnScheduled)2 StateMap (org.apache.nifi.components.state.StateMap)2 Before (org.junit.Before)2 ByteArrayOutputStream (java.io.ByteArrayOutputStream)1 HashSet (java.util.HashSet)1 Map (java.util.Map)1 OnEnabled (org.apache.nifi.annotation.lifecycle.OnEnabled)1 Column (org.apache.nifi.hbase.scan.Column)1