Search in sources :

Example 16 with Event

use of org.apache.hadoop.hdfs.inotify.Event in project SSM by Intel-bigdata.

the class TestInotifyFetcher method testFetcher.

@Test(timeout = 60000)
public void testFetcher() throws IOException, InterruptedException {
    Configuration conf = new HdfsConfiguration();
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
    // so that we can get an atime change
    conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
    builder.numDataNodes(2);
    MiniDFSCluster cluster = builder.build();
    try {
        cluster.waitActive();
        DFSClient client = new DFSClient(cluster.getNameNode(0).getNameNodeAddress(), conf);
        FileSystem fs = cluster.getFileSystem(0);
        DFSTestUtil.createFile(fs, new Path("/file"), BLOCK_SIZE, (short) 1, 0L);
        DFSTestUtil.createFile(fs, new Path("/file3"), BLOCK_SIZE, (short) 1, 0L);
        DFSTestUtil.createFile(fs, new Path("/file5"), BLOCK_SIZE, (short) 1, 0L);
        DFSTestUtil.createFile(fs, new Path("/truncate_file"), BLOCK_SIZE * 2, (short) 1, 0L);
        fs.mkdirs(new Path("/tmp"), new FsPermission("777"));
        DBAdapter adapter = mock(DBAdapter.class);
        EventApplierForTest applierForTest = new EventApplierForTest(adapter, client);
        final InotifyEventFetcher fetcher = new InotifyEventFetcher(client, adapter, Executors.newScheduledThreadPool(2), applierForTest);
        Thread thread = new Thread() {

            public void run() {
                try {
                    fetcher.start();
                } catch (IOException | InterruptedException e) {
                    e.printStackTrace();
                }
            }
        };
        thread.start();
        Thread.sleep(2000);
        /**
       * Code copy from {@link org.apache.hadoop.hdfs.TestDFSInotifyEventInputStream}
       */
        // RenameOp -> RenameEvent
        client.rename("/file", "/file4", null);
        // RenameOldOp -> RenameEvent
        client.rename("/file4", "/file2");
        // DeleteOp, AddOp -> UnlinkEvent, CreateEvent
        OutputStream os = client.create("/file2", true, (short) 2, BLOCK_SIZE);
        os.write(new byte[BLOCK_SIZE]);
        // CloseOp -> CloseEvent
        os.close();
        // AddOp -> AppendEvent
        os = client.append("/file2", BLOCK_SIZE, EnumSet.of(CreateFlag.APPEND), null, null);
        os.write(new byte[BLOCK_SIZE]);
        // CloseOp -> CloseEvent
        os.close();
        // so that the atime will get updated on the next line
        Thread.sleep(10);
        // TimesOp -> MetadataUpdateEvent
        client.open("/file2").read(new byte[1]);
        // SetReplicationOp -> MetadataUpdateEvent
        client.setReplication("/file2", (short) 1);
        // ConcatDeleteOp -> AppendEvent, UnlinkEvent, CloseEvent
        client.concat("/file2", new String[] { "/file3" });
        // DeleteOp -> UnlinkEvent
        client.delete("/file2", false);
        // MkdirOp -> CreateEvent
        client.mkdirs("/dir", null, false);
        // SetPermissionsOp -> MetadataUpdateEvent
        client.setPermission("/dir", FsPermission.valueOf("-rw-rw-rw-"));
        // SetOwnerOp -> MetadataUpdateEvent
        Thread.sleep(2000);
        client.setOwner("/dir", "username", "groupname");
        // SymlinkOp -> CreateEvent
        client.createSymlink("/dir", "/dir2", false);
        client.setXAttr("/file5", "user.field", "value".getBytes(), EnumSet.of(// SetXAttrOp -> MetadataUpdateEvent
        XAttrSetFlag.CREATE));
        // RemoveXAttrOp -> MetadataUpdateEvent
        client.removeXAttr("/file5", "user.field");
        // SetAclOp -> MetadataUpdateEvent
        client.setAcl("/file5", AclEntry.parseAclSpec("user::rwx,user:foo:rw-,group::r--,other::---", true));
        // SetAclOp -> MetadataUpdateEvent
        client.removeAcl("/file5");
        // RenameOldOp -> RenameEvent
        client.rename("/file5", "/dir");
        //TruncateOp -> TruncateEvent
        client.truncate("/truncate_file", BLOCK_SIZE);
        while (applierForTest.getEvents().size() != 21) {
            Thread.sleep(100);
        }
        /**
       * Refer {@link org.apache.hadoop.hdfs.TestDFSInotifyEventInputStream} for more detail
       */
        List<Event> events = applierForTest.getEvents();
        Assert.assertTrue(events.get(0).getEventType() == Event.EventType.RENAME);
        Assert.assertTrue(events.get(1).getEventType() == Event.EventType.RENAME);
        Assert.assertTrue(events.get(2).getEventType() == Event.EventType.CREATE);
        Assert.assertTrue(events.get(3).getEventType() == Event.EventType.CLOSE);
        Assert.assertTrue(events.get(4).getEventType() == Event.EventType.APPEND);
        Assert.assertTrue(events.get(5).getEventType() == Event.EventType.CLOSE);
        Assert.assertTrue(events.get(6).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(7).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(8).getEventType() == Event.EventType.APPEND);
        Assert.assertTrue(events.get(9).getEventType() == Event.EventType.UNLINK);
        Assert.assertTrue(events.get(10).getEventType() == Event.EventType.CLOSE);
        Assert.assertTrue(events.get(11).getEventType() == Event.EventType.UNLINK);
        Assert.assertTrue(events.get(12).getEventType() == Event.EventType.CREATE);
        Assert.assertTrue(events.get(13).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(14).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(15).getEventType() == Event.EventType.CREATE);
        Assert.assertTrue(events.get(16).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(17).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(18).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(19).getEventType() == Event.EventType.METADATA);
        Assert.assertTrue(events.get(20).getEventType() == Event.EventType.RENAME);
        //      Assert.assertTrue(events.get(21).getEventType() == Event.EventType.TRUNCATE);
        fetcher.stop();
    } finally {
        cluster.shutdown();
    }
}
Also used : DFSClient(org.apache.hadoop.hdfs.DFSClient) Path(org.apache.hadoop.fs.Path) MiniDFSCluster(org.apache.hadoop.hdfs.MiniDFSCluster) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) Configuration(org.apache.hadoop.conf.Configuration) OutputStream(java.io.OutputStream) IOException(java.io.IOException) HdfsConfiguration(org.apache.hadoop.hdfs.HdfsConfiguration) DBAdapter(org.smartdata.server.metastore.DBAdapter) FileSystem(org.apache.hadoop.fs.FileSystem) Event(org.apache.hadoop.hdfs.inotify.Event) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Test(org.junit.Test)

Example 17 with Event

use of org.apache.hadoop.hdfs.inotify.Event in project nifi by apache.

the class GetHDFSEvents method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final StateManager stateManager = context.getStateManager();
    try {
        StateMap state = stateManager.getState(Scope.CLUSTER);
        String txIdAsString = state.get(LAST_TX_ID);
        if (txIdAsString != null && !"".equals(txIdAsString)) {
            lastTxId = Long.parseLong(txIdAsString);
        }
    } catch (IOException e) {
        getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
        context.yield();
        return;
    }
    try {
        final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
        final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
        final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
        final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
        final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
        if (eventBatch != null && eventBatch.getEvents() != null) {
            if (eventBatch.getEvents().length > 0) {
                List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
                for (Event e : eventBatch.getEvents()) {
                    if (toProcessEvent(context, e)) {
                        getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
                        final String path = getPath(e);
                        FlowFile flowFile = session.create();
                        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
                        flowFile = session.write(flowFile, new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(OBJECT_MAPPER.writeValueAsBytes(e));
                            }
                        });
                        flowFiles.add(flowFile);
                    }
                }
                for (FlowFile flowFile : flowFiles) {
                    final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
                    final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
                    getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
                    session.transfer(flowFile, REL_SUCCESS);
                    session.getProvenanceReporter().receive(flowFile, transitUri);
                }
            }
            lastTxId = eventBatch.getTxid();
        }
    } catch (IOException | InterruptedException e) {
        getLogger().error("Unable to get notification information: {}", new Object[] { e });
        context.yield();
        return;
    } catch (MissingEventsException e) {
        // set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
        // org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
        lastTxId = -1L;
        getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
    }
    updateClusterStateForTxId(stateManager);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) StateMap(org.apache.nifi.components.state.StateMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MissingEventsException(org.apache.hadoop.hdfs.inotify.MissingEventsException) StateManager(org.apache.nifi.components.state.StateManager) TimeUnit(java.util.concurrent.TimeUnit) Event(org.apache.hadoop.hdfs.inotify.Event) DFSInotifyEventInputStream(org.apache.hadoop.hdfs.DFSInotifyEventInputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch)

Example 18 with Event

use of org.apache.hadoop.hdfs.inotify.Event in project nifi by apache.

the class TestGetHDFSEvents method makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship.

@Test
public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception {
    Event[] events = getEvents();
    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);
    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);
    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);
    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*");
    runner.run();
    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(3, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) Event(org.apache.hadoop.hdfs.inotify.Event) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch) Test(org.junit.Test)

Example 19 with Event

use of org.apache.hadoop.hdfs.inotify.Event in project nifi by apache.

the class TestGetHDFSEvents method onTriggerShouldOnlyProcessEventsWithSpecificPath.

@Test
public void onTriggerShouldOnlyProcessEventsWithSpecificPath() throws Exception {
    Event[] events = getEvents();
    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);
    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);
    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/create(/)?");
    runner.run();
    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(1, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) Event(org.apache.hadoop.hdfs.inotify.Event) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch) Test(org.junit.Test)

Example 20 with Event

use of org.apache.hadoop.hdfs.inotify.Event in project SSM by Intel-bigdata.

the class TestInotifyEventApplier method testApplier.

@Test
public void testApplier() throws Exception {
    DFSClient client = Mockito.mock(DFSClient.class);
    FileInfo root = HadoopUtil.convertFileStatus(getDummyDirStatus("/", 1000), "/");
    metaStore.insertFile(root);
    BackUpInfo backUpInfo = new BackUpInfo(1L, "/file", "remote/dest/", 10);
    metaStore.insertBackUpInfo(backUpInfo);
    InotifyEventApplier applier = new InotifyEventApplier(metaStore, client);
    Event.CreateEvent createEvent = new Event.CreateEvent.Builder().iNodeType(Event.CreateEvent.INodeType.FILE).ctime(1).defaultBlockSize(1024).groupName("cg1").overwrite(true).ownerName("user1").path("/file").perms(new FsPermission("777")).replication(3).build();
    HdfsFileStatus status1 = CompatibilityHelperLoader.getHelper().createHdfsFileStatus(0, false, 1, 1024, 0, 0, new FsPermission("777"), "owner", "group", new byte[0], new byte[0], 1010, 0, null, (byte) 0);
    Mockito.when(client.getFileInfo(Matchers.startsWith("/file"))).thenReturn(status1);
    Mockito.when(client.getFileInfo(Matchers.startsWith("/dir"))).thenReturn(getDummyDirStatus("", 1010));
    applier.apply(new Event[] { createEvent });
    FileInfo result1 = metaStore.getFile().get(1);
    Assert.assertEquals(result1.getPath(), "/file");
    Assert.assertEquals(result1.getFileId(), 1010L);
    Assert.assertEquals(result1.getPermission(), 511);
    Event close = new Event.CloseEvent("/file", 1024, 0);
    applier.apply(new Event[] { close });
    FileInfo result2 = metaStore.getFile().get(1);
    Assert.assertEquals(result2.getLength(), 1024);
    Assert.assertEquals(result2.getModificationTime(), 0L);
    // Event truncate = new Event.TruncateEvent("/file", 512, 16);
    // applier.apply(new Event[] {truncate});
    // ResultSet result3 = metaStore.executeQuery("SELECT * FROM files");
    // Assert.assertEquals(result3.getLong("length"), 512);
    // Assert.assertEquals(result3.getLong("modification_time"), 16L);
    Event meta = new Event.MetadataUpdateEvent.Builder().path("/file").metadataType(Event.MetadataUpdateEvent.MetadataType.TIMES).mtime(2).atime(3).replication(4).ownerName("user2").groupName("cg2").build();
    applier.apply(new Event[] { meta });
    FileInfo result4 = metaStore.getFile().get(1);
    Assert.assertEquals(result4.getAccessTime(), 3);
    Assert.assertEquals(result4.getModificationTime(), 2);
    Event meta1 = new Event.MetadataUpdateEvent.Builder().path("/file").metadataType(Event.MetadataUpdateEvent.MetadataType.OWNER).ownerName("user1").groupName("cg1").build();
    applier.apply(new Event[] { meta1 });
    result4 = metaStore.getFile().get(1);
    Assert.assertEquals(result4.getOwner(), "user1");
    Assert.assertEquals(result4.getGroup(), "cg1");
    Event.CreateEvent createEvent2 = new Event.CreateEvent.Builder().iNodeType(Event.CreateEvent.INodeType.DIRECTORY).ctime(1).groupName("cg1").overwrite(true).ownerName("user1").path("/dir").perms(new FsPermission("777")).replication(3).build();
    Event.CreateEvent createEvent3 = new Event.CreateEvent.Builder().iNodeType(Event.CreateEvent.INodeType.FILE).ctime(1).groupName("cg1").overwrite(true).ownerName("user1").path("/dir/file").perms(new FsPermission("777")).replication(3).build();
    Event rename = new Event.RenameEvent.Builder().dstPath("/dir2").srcPath("/dir").timestamp(5).build();
    applier.apply(new Event[] { createEvent2, createEvent3, rename });
    List<FileInfo> result5 = metaStore.getFile();
    List<String> expectedPaths = Arrays.asList("/dir2", "/dir2/file", "/file");
    List<String> actualPaths = new ArrayList<>();
    for (FileInfo s : result5) {
        actualPaths.add(s.getPath());
    }
    Collections.sort(actualPaths);
    Assert.assertTrue(actualPaths.size() == 4);
    Assert.assertTrue(actualPaths.containsAll(expectedPaths));
    Event unlink = new Event.UnlinkEvent.Builder().path("/").timestamp(6).build();
    applier.apply(new Event[] { unlink });
    Thread.sleep(1200);
    Assert.assertEquals(metaStore.getFile().size(), 0);
    System.out.println("Files in table " + metaStore.getFile().size());
    List<FileDiff> fileDiffList = metaStore.getPendingDiff();
    Assert.assertTrue(fileDiffList.size() == 4);
}
Also used : DFSClient(org.apache.hadoop.hdfs.DFSClient) ArrayList(java.util.ArrayList) FileInfo(org.smartdata.model.FileInfo) BackUpInfo(org.smartdata.model.BackUpInfo) HdfsFileStatus(org.apache.hadoop.hdfs.protocol.HdfsFileStatus) Event(org.apache.hadoop.hdfs.inotify.Event) FileDiff(org.smartdata.model.FileDiff) FsPermission(org.apache.hadoop.fs.permission.FsPermission) Test(org.junit.Test)

Aggregations

Event (org.apache.hadoop.hdfs.inotify.Event)21 Test (org.junit.Test)12 EventBatch (org.apache.hadoop.hdfs.inotify.EventBatch)10 ArrayList (java.util.ArrayList)9 FsPermission (org.apache.hadoop.fs.permission.FsPermission)7 DFSClient (org.apache.hadoop.hdfs.DFSClient)6 IOException (java.io.IOException)5 MockFlowFile (org.apache.nifi.util.MockFlowFile)4 TestRunner (org.apache.nifi.util.TestRunner)4 OutputStream (java.io.OutputStream)3 HdfsFileStatus (org.apache.hadoop.hdfs.protocol.HdfsFileStatus)3 AlluxioURI (alluxio.AlluxioURI)2 InvalidPathException (alluxio.exception.InvalidPathException)2 LockResource (alluxio.resource.LockResource)2 Callable (java.util.concurrent.Callable)2 AclEntryProto (org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEntryProto)2 FileInfo (org.smartdata.model.FileInfo)2 Constants (alluxio.Constants)1 SyncInfo (alluxio.SyncInfo)1 ConcurrentHashSet (alluxio.collections.ConcurrentHashSet)1