use of org.apache.hadoop.hdfs.inotify.Event in project SSM by Intel-bigdata.
the class TestInotifyFetcher method testFetcher.
@Test(timeout = 60000)
public void testFetcher() throws IOException, InterruptedException {
Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_ACLS_ENABLED_KEY, true);
// so that we can get an atime change
conf.setLong(DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY, 1);
MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
builder.numDataNodes(2);
MiniDFSCluster cluster = builder.build();
try {
cluster.waitActive();
DFSClient client = new DFSClient(cluster.getNameNode(0).getNameNodeAddress(), conf);
FileSystem fs = cluster.getFileSystem(0);
DFSTestUtil.createFile(fs, new Path("/file"), BLOCK_SIZE, (short) 1, 0L);
DFSTestUtil.createFile(fs, new Path("/file3"), BLOCK_SIZE, (short) 1, 0L);
DFSTestUtil.createFile(fs, new Path("/file5"), BLOCK_SIZE, (short) 1, 0L);
DFSTestUtil.createFile(fs, new Path("/truncate_file"), BLOCK_SIZE * 2, (short) 1, 0L);
fs.mkdirs(new Path("/tmp"), new FsPermission("777"));
DBAdapter adapter = mock(DBAdapter.class);
EventApplierForTest applierForTest = new EventApplierForTest(adapter, client);
final InotifyEventFetcher fetcher = new InotifyEventFetcher(client, adapter, Executors.newScheduledThreadPool(2), applierForTest);
Thread thread = new Thread() {
public void run() {
try {
fetcher.start();
} catch (IOException | InterruptedException e) {
e.printStackTrace();
}
}
};
thread.start();
Thread.sleep(2000);
/**
* Code copy from {@link org.apache.hadoop.hdfs.TestDFSInotifyEventInputStream}
*/
// RenameOp -> RenameEvent
client.rename("/file", "/file4", null);
// RenameOldOp -> RenameEvent
client.rename("/file4", "/file2");
// DeleteOp, AddOp -> UnlinkEvent, CreateEvent
OutputStream os = client.create("/file2", true, (short) 2, BLOCK_SIZE);
os.write(new byte[BLOCK_SIZE]);
// CloseOp -> CloseEvent
os.close();
// AddOp -> AppendEvent
os = client.append("/file2", BLOCK_SIZE, EnumSet.of(CreateFlag.APPEND), null, null);
os.write(new byte[BLOCK_SIZE]);
// CloseOp -> CloseEvent
os.close();
// so that the atime will get updated on the next line
Thread.sleep(10);
// TimesOp -> MetadataUpdateEvent
client.open("/file2").read(new byte[1]);
// SetReplicationOp -> MetadataUpdateEvent
client.setReplication("/file2", (short) 1);
// ConcatDeleteOp -> AppendEvent, UnlinkEvent, CloseEvent
client.concat("/file2", new String[] { "/file3" });
// DeleteOp -> UnlinkEvent
client.delete("/file2", false);
// MkdirOp -> CreateEvent
client.mkdirs("/dir", null, false);
// SetPermissionsOp -> MetadataUpdateEvent
client.setPermission("/dir", FsPermission.valueOf("-rw-rw-rw-"));
// SetOwnerOp -> MetadataUpdateEvent
Thread.sleep(2000);
client.setOwner("/dir", "username", "groupname");
// SymlinkOp -> CreateEvent
client.createSymlink("/dir", "/dir2", false);
client.setXAttr("/file5", "user.field", "value".getBytes(), EnumSet.of(// SetXAttrOp -> MetadataUpdateEvent
XAttrSetFlag.CREATE));
// RemoveXAttrOp -> MetadataUpdateEvent
client.removeXAttr("/file5", "user.field");
// SetAclOp -> MetadataUpdateEvent
client.setAcl("/file5", AclEntry.parseAclSpec("user::rwx,user:foo:rw-,group::r--,other::---", true));
// SetAclOp -> MetadataUpdateEvent
client.removeAcl("/file5");
// RenameOldOp -> RenameEvent
client.rename("/file5", "/dir");
//TruncateOp -> TruncateEvent
client.truncate("/truncate_file", BLOCK_SIZE);
while (applierForTest.getEvents().size() != 21) {
Thread.sleep(100);
}
/**
* Refer {@link org.apache.hadoop.hdfs.TestDFSInotifyEventInputStream} for more detail
*/
List<Event> events = applierForTest.getEvents();
Assert.assertTrue(events.get(0).getEventType() == Event.EventType.RENAME);
Assert.assertTrue(events.get(1).getEventType() == Event.EventType.RENAME);
Assert.assertTrue(events.get(2).getEventType() == Event.EventType.CREATE);
Assert.assertTrue(events.get(3).getEventType() == Event.EventType.CLOSE);
Assert.assertTrue(events.get(4).getEventType() == Event.EventType.APPEND);
Assert.assertTrue(events.get(5).getEventType() == Event.EventType.CLOSE);
Assert.assertTrue(events.get(6).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(7).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(8).getEventType() == Event.EventType.APPEND);
Assert.assertTrue(events.get(9).getEventType() == Event.EventType.UNLINK);
Assert.assertTrue(events.get(10).getEventType() == Event.EventType.CLOSE);
Assert.assertTrue(events.get(11).getEventType() == Event.EventType.UNLINK);
Assert.assertTrue(events.get(12).getEventType() == Event.EventType.CREATE);
Assert.assertTrue(events.get(13).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(14).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(15).getEventType() == Event.EventType.CREATE);
Assert.assertTrue(events.get(16).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(17).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(18).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(19).getEventType() == Event.EventType.METADATA);
Assert.assertTrue(events.get(20).getEventType() == Event.EventType.RENAME);
// Assert.assertTrue(events.get(21).getEventType() == Event.EventType.TRUNCATE);
fetcher.stop();
} finally {
cluster.shutdown();
}
}
use of org.apache.hadoop.hdfs.inotify.Event in project nifi by apache.
the class GetHDFSEvents method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final StateManager stateManager = context.getStateManager();
try {
StateMap state = stateManager.getState(Scope.CLUSTER);
String txIdAsString = state.get(LAST_TX_ID);
if (txIdAsString != null && !"".equals(txIdAsString)) {
lastTxId = Long.parseLong(txIdAsString);
}
} catch (IOException e) {
getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
context.yield();
return;
}
try {
final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
if (eventBatch != null && eventBatch.getEvents() != null) {
if (eventBatch.getEvents().length > 0) {
List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
for (Event e : eventBatch.getEvents()) {
if (toProcessEvent(context, e)) {
getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
final String path = getPath(e);
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(OBJECT_MAPPER.writeValueAsBytes(e));
}
});
flowFiles.add(flowFile);
}
}
for (FlowFile flowFile : flowFiles) {
final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().receive(flowFile, transitUri);
}
}
lastTxId = eventBatch.getTxid();
}
} catch (IOException | InterruptedException e) {
getLogger().error("Unable to get notification information: {}", new Object[] { e });
context.yield();
return;
} catch (MissingEventsException e) {
// set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
// org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
lastTxId = -1L;
getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
}
updateClusterStateForTxId(stateManager);
}
use of org.apache.hadoop.hdfs.inotify.Event in project nifi by apache.
the class TestGetHDFSEvents method makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship.
@Test
public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception {
Event[] events = getEvents();
EventBatch eventBatch = mock(EventBatch.class);
when(eventBatch.getEvents()).thenReturn(events);
when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
when(eventBatch.getTxid()).thenReturn(100L);
GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
TestRunner runner = TestRunners.newTestRunner(processor);
runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*");
runner.run();
List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
assertEquals(3, successfulFlowFiles.size());
verify(eventBatch).getTxid();
assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
use of org.apache.hadoop.hdfs.inotify.Event in project nifi by apache.
the class TestGetHDFSEvents method onTriggerShouldOnlyProcessEventsWithSpecificPath.
@Test
public void onTriggerShouldOnlyProcessEventsWithSpecificPath() throws Exception {
Event[] events = getEvents();
EventBatch eventBatch = mock(EventBatch.class);
when(eventBatch.getEvents()).thenReturn(events);
when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
when(eventBatch.getTxid()).thenReturn(100L);
GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
TestRunner runner = TestRunners.newTestRunner(processor);
runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path/create(/)?");
runner.run();
List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
assertEquals(1, successfulFlowFiles.size());
verify(eventBatch).getTxid();
assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
use of org.apache.hadoop.hdfs.inotify.Event in project SSM by Intel-bigdata.
the class TestInotifyEventApplier method testApplier.
@Test
public void testApplier() throws Exception {
DFSClient client = Mockito.mock(DFSClient.class);
FileInfo root = HadoopUtil.convertFileStatus(getDummyDirStatus("/", 1000), "/");
metaStore.insertFile(root);
BackUpInfo backUpInfo = new BackUpInfo(1L, "/file", "remote/dest/", 10);
metaStore.insertBackUpInfo(backUpInfo);
InotifyEventApplier applier = new InotifyEventApplier(metaStore, client);
Event.CreateEvent createEvent = new Event.CreateEvent.Builder().iNodeType(Event.CreateEvent.INodeType.FILE).ctime(1).defaultBlockSize(1024).groupName("cg1").overwrite(true).ownerName("user1").path("/file").perms(new FsPermission("777")).replication(3).build();
HdfsFileStatus status1 = CompatibilityHelperLoader.getHelper().createHdfsFileStatus(0, false, 1, 1024, 0, 0, new FsPermission("777"), "owner", "group", new byte[0], new byte[0], 1010, 0, null, (byte) 0);
Mockito.when(client.getFileInfo(Matchers.startsWith("/file"))).thenReturn(status1);
Mockito.when(client.getFileInfo(Matchers.startsWith("/dir"))).thenReturn(getDummyDirStatus("", 1010));
applier.apply(new Event[] { createEvent });
FileInfo result1 = metaStore.getFile().get(1);
Assert.assertEquals(result1.getPath(), "/file");
Assert.assertEquals(result1.getFileId(), 1010L);
Assert.assertEquals(result1.getPermission(), 511);
Event close = new Event.CloseEvent("/file", 1024, 0);
applier.apply(new Event[] { close });
FileInfo result2 = metaStore.getFile().get(1);
Assert.assertEquals(result2.getLength(), 1024);
Assert.assertEquals(result2.getModificationTime(), 0L);
// Event truncate = new Event.TruncateEvent("/file", 512, 16);
// applier.apply(new Event[] {truncate});
// ResultSet result3 = metaStore.executeQuery("SELECT * FROM files");
// Assert.assertEquals(result3.getLong("length"), 512);
// Assert.assertEquals(result3.getLong("modification_time"), 16L);
Event meta = new Event.MetadataUpdateEvent.Builder().path("/file").metadataType(Event.MetadataUpdateEvent.MetadataType.TIMES).mtime(2).atime(3).replication(4).ownerName("user2").groupName("cg2").build();
applier.apply(new Event[] { meta });
FileInfo result4 = metaStore.getFile().get(1);
Assert.assertEquals(result4.getAccessTime(), 3);
Assert.assertEquals(result4.getModificationTime(), 2);
Event meta1 = new Event.MetadataUpdateEvent.Builder().path("/file").metadataType(Event.MetadataUpdateEvent.MetadataType.OWNER).ownerName("user1").groupName("cg1").build();
applier.apply(new Event[] { meta1 });
result4 = metaStore.getFile().get(1);
Assert.assertEquals(result4.getOwner(), "user1");
Assert.assertEquals(result4.getGroup(), "cg1");
Event.CreateEvent createEvent2 = new Event.CreateEvent.Builder().iNodeType(Event.CreateEvent.INodeType.DIRECTORY).ctime(1).groupName("cg1").overwrite(true).ownerName("user1").path("/dir").perms(new FsPermission("777")).replication(3).build();
Event.CreateEvent createEvent3 = new Event.CreateEvent.Builder().iNodeType(Event.CreateEvent.INodeType.FILE).ctime(1).groupName("cg1").overwrite(true).ownerName("user1").path("/dir/file").perms(new FsPermission("777")).replication(3).build();
Event rename = new Event.RenameEvent.Builder().dstPath("/dir2").srcPath("/dir").timestamp(5).build();
applier.apply(new Event[] { createEvent2, createEvent3, rename });
List<FileInfo> result5 = metaStore.getFile();
List<String> expectedPaths = Arrays.asList("/dir2", "/dir2/file", "/file");
List<String> actualPaths = new ArrayList<>();
for (FileInfo s : result5) {
actualPaths.add(s.getPath());
}
Collections.sort(actualPaths);
Assert.assertTrue(actualPaths.size() == 4);
Assert.assertTrue(actualPaths.containsAll(expectedPaths));
Event unlink = new Event.UnlinkEvent.Builder().path("/").timestamp(6).build();
applier.apply(new Event[] { unlink });
Thread.sleep(1200);
Assert.assertEquals(metaStore.getFile().size(), 0);
System.out.println("Files in table " + metaStore.getFile().size());
List<FileDiff> fileDiffList = metaStore.getPendingDiff();
Assert.assertTrue(fileDiffList.size() == 4);
}
Aggregations