use of org.apache.hadoop.hdfs.inotify.EventBatch in project hadoop by apache.
the class PBHelperClient method convert.
public static EventBatchList convert(GetEditsFromTxidResponseProto resp) throws IOException {
final InotifyProtos.EventsListProto list = resp.getEventsList();
final long firstTxid = list.getFirstTxid();
final long lastTxid = list.getLastTxid();
List<EventBatch> batches = Lists.newArrayList();
if (list.getEventsList().size() > 0) {
throw new IOException("Can't handle old inotify server response.");
}
for (InotifyProtos.EventBatchProto bp : list.getBatchList()) {
long txid = bp.getTxid();
if ((txid != -1) && ((txid < firstTxid) || (txid > lastTxid))) {
throw new IOException("Error converting TxidResponseProto: got a " + "transaction id " + txid + " that was outside the range of [" + firstTxid + ", " + lastTxid + "].");
}
List<Event> events = Lists.newArrayList();
for (InotifyProtos.EventProto p : bp.getEventsList()) {
switch(p.getType()) {
case EVENT_CLOSE:
InotifyProtos.CloseEventProto close = InotifyProtos.CloseEventProto.parseFrom(p.getContents());
events.add(new Event.CloseEvent(close.getPath(), close.getFileSize(), close.getTimestamp()));
break;
case EVENT_CREATE:
InotifyProtos.CreateEventProto create = InotifyProtos.CreateEventProto.parseFrom(p.getContents());
events.add(new Event.CreateEvent.Builder().iNodeType(createTypeConvert(create.getType())).path(create.getPath()).ctime(create.getCtime()).ownerName(create.getOwnerName()).groupName(create.getGroupName()).perms(convert(create.getPerms())).replication(create.getReplication()).symlinkTarget(create.getSymlinkTarget().isEmpty() ? null : create.getSymlinkTarget()).defaultBlockSize(create.getDefaultBlockSize()).overwrite(create.getOverwrite()).build());
break;
case EVENT_METADATA:
InotifyProtos.MetadataUpdateEventProto meta = InotifyProtos.MetadataUpdateEventProto.parseFrom(p.getContents());
events.add(new Event.MetadataUpdateEvent.Builder().path(meta.getPath()).metadataType(metadataUpdateTypeConvert(meta.getType())).mtime(meta.getMtime()).atime(meta.getAtime()).replication(meta.getReplication()).ownerName(meta.getOwnerName().isEmpty() ? null : meta.getOwnerName()).groupName(meta.getGroupName().isEmpty() ? null : meta.getGroupName()).perms(meta.hasPerms() ? convert(meta.getPerms()) : null).acls(meta.getAclsList().isEmpty() ? null : convertAclEntry(meta.getAclsList())).xAttrs(meta.getXAttrsList().isEmpty() ? null : convertXAttrs(meta.getXAttrsList())).xAttrsRemoved(meta.getXAttrsRemoved()).build());
break;
case EVENT_RENAME:
InotifyProtos.RenameEventProto rename = InotifyProtos.RenameEventProto.parseFrom(p.getContents());
events.add(new Event.RenameEvent.Builder().srcPath(rename.getSrcPath()).dstPath(rename.getDestPath()).timestamp(rename.getTimestamp()).build());
break;
case EVENT_APPEND:
InotifyProtos.AppendEventProto append = InotifyProtos.AppendEventProto.parseFrom(p.getContents());
events.add(new Event.AppendEvent.Builder().path(append.getPath()).newBlock(append.hasNewBlock() && append.getNewBlock()).build());
break;
case EVENT_UNLINK:
InotifyProtos.UnlinkEventProto unlink = InotifyProtos.UnlinkEventProto.parseFrom(p.getContents());
events.add(new Event.UnlinkEvent.Builder().path(unlink.getPath()).timestamp(unlink.getTimestamp()).build());
break;
case EVENT_TRUNCATE:
InotifyProtos.TruncateEventProto truncate = InotifyProtos.TruncateEventProto.parseFrom(p.getContents());
events.add(new Event.TruncateEvent(truncate.getPath(), truncate.getFileSize(), truncate.getTimestamp()));
break;
default:
throw new RuntimeException("Unexpected inotify event type: " + p.getType());
}
}
batches.add(new EventBatch(txid, events.toArray(new Event[0])));
}
return new EventBatchList(batches, resp.getEventsList().getFirstTxid(), resp.getEventsList().getLastTxid(), resp.getEventsList().getSyncTxid());
}
use of org.apache.hadoop.hdfs.inotify.EventBatch in project hadoop by apache.
the class TestDFSInotifyEventInputStream method testTwoActiveNNs.
@Test(timeout = 120000)
public void testTwoActiveNNs() throws IOException, MissingEventsException {
Configuration conf = new HdfsConfiguration();
MiniQJMHACluster cluster = new MiniQJMHACluster.Builder(conf).build();
try {
cluster.getDfsCluster().waitActive();
cluster.getDfsCluster().transitionToActive(0);
DFSClient client0 = new DFSClient(cluster.getDfsCluster().getNameNode(0).getNameNodeAddress(), conf);
DFSClient client1 = new DFSClient(cluster.getDfsCluster().getNameNode(1).getNameNodeAddress(), conf);
DFSInotifyEventInputStream eis = client0.getInotifyEventStream();
for (int i = 0; i < 10; i++) {
client0.mkdirs("/dir" + i, null, false);
}
cluster.getDfsCluster().transitionToActive(1);
for (int i = 10; i < 20; i++) {
client1.mkdirs("/dir" + i, null, false);
}
// make sure that the old active can't read any further than the edits
// it logged itself (it has no idea whether the in-progress edits from
// the other writer have actually been committed)
EventBatch batch = null;
for (int i = 0; i < 10; i++) {
batch = waitForNextEvents(eis);
Assert.assertEquals(1, batch.getEvents().length);
Assert.assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
Assert.assertTrue(((Event.CreateEvent) batch.getEvents()[0]).getPath().equals("/dir" + i));
}
Assert.assertTrue(eis.poll() == null);
} finally {
try {
cluster.shutdown();
} catch (ExitUtil.ExitException e) {
// expected because the old active will be unable to flush the
// end-of-segment op since it is fenced
}
}
}
use of org.apache.hadoop.hdfs.inotify.EventBatch in project hadoop by apache.
the class TestDFSUpgradeFromImage method testPreserveEditLogs.
@Test
public void testPreserveEditLogs() throws Exception {
unpackStorage(HADOOP252_IMAGE, HADOOP_DFS_DIR_TXT);
/**
* The pre-created image has the following edits:
* mkdir /input; mkdir /input/dir1~5
* copyFromLocal randome_file_1 /input/dir1
* copyFromLocal randome_file_2 /input/dir2
* mv /input/dir1/randome_file_1 /input/dir3/randome_file_3
* rmdir /input/dir1
*/
Configuration conf = new HdfsConfiguration();
conf = UpgradeUtilities.initializeStorageStateConf(1, conf);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(false).manageDataDfsDirs(false).manageNameDfsDirs(false).startupOption(StartupOption.UPGRADE).build();
DFSInotifyEventInputStream ieis = cluster.getFileSystem().getInotifyEventStream(0);
EventBatch batch;
Event.CreateEvent ce;
Event.RenameEvent re;
// mkdir /input
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
ce = (Event.CreateEvent) batch.getEvents()[0];
assertEquals(ce.getPath(), "/input");
// mkdir /input/dir1~5
for (int i = 1; i <= 5; i++) {
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
ce = (Event.CreateEvent) batch.getEvents()[0];
assertEquals(ce.getPath(), "/input/dir" + i);
}
// copyFromLocal randome_file_1~2 /input/dir1~2
for (int i = 1; i <= 2; i++) {
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
if (batch.getEvents()[0].getEventType() != Event.EventType.CREATE) {
FSImage.LOG.debug("");
}
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
// copyFromLocal randome_file_1 /input/dir1, CLOSE
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CLOSE);
// copyFromLocal randome_file_1 /input/dir1, CLOSE
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.RENAME);
re = (Event.RenameEvent) batch.getEvents()[0];
assertEquals(re.getDstPath(), "/input/dir" + i + "/randome_file_" + i);
}
// mv /input/dir1/randome_file_1 /input/dir3/randome_file_3
long txIDBeforeRename = batch.getTxid();
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.RENAME);
re = (Event.RenameEvent) batch.getEvents()[0];
assertEquals(re.getDstPath(), "/input/dir3/randome_file_3");
// rmdir /input/dir1
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.UNLINK);
assertEquals(((Event.UnlinkEvent) batch.getEvents()[0]).getPath(), "/input/dir1");
long lastTxID = batch.getTxid();
// Start inotify from the tx before rename /input/dir1/randome_file_1
ieis = cluster.getFileSystem().getInotifyEventStream(txIDBeforeRename);
batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
assertEquals(1, batch.getEvents().length);
assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.RENAME);
re = (Event.RenameEvent) batch.getEvents()[0];
assertEquals(re.getDstPath(), "/input/dir3/randome_file_3");
// Try to read beyond available edits
ieis = cluster.getFileSystem().getInotifyEventStream(lastTxID + 1);
assertNull(ieis.poll());
cluster.shutdown();
}
use of org.apache.hadoop.hdfs.inotify.EventBatch in project nifi by apache.
the class GetHDFSEvents method onTrigger.
@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
final StateManager stateManager = context.getStateManager();
try {
StateMap state = stateManager.getState(Scope.CLUSTER);
String txIdAsString = state.get(LAST_TX_ID);
if (txIdAsString != null && !"".equals(txIdAsString)) {
lastTxId = Long.parseLong(txIdAsString);
}
} catch (IOException e) {
getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
context.yield();
return;
}
try {
final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
if (eventBatch != null && eventBatch.getEvents() != null) {
if (eventBatch.getEvents().length > 0) {
List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
for (Event e : eventBatch.getEvents()) {
if (toProcessEvent(context, e)) {
getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
final String path = getPath(e);
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
flowFile = session.write(flowFile, new OutputStreamCallback() {
@Override
public void process(OutputStream out) throws IOException {
out.write(OBJECT_MAPPER.writeValueAsBytes(e));
}
});
flowFiles.add(flowFile);
}
}
for (FlowFile flowFile : flowFiles) {
final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
session.transfer(flowFile, REL_SUCCESS);
session.getProvenanceReporter().receive(flowFile, transitUri);
}
}
lastTxId = eventBatch.getTxid();
}
} catch (IOException | InterruptedException e) {
getLogger().error("Unable to get notification information: {}", new Object[] { e });
context.yield();
return;
} catch (MissingEventsException e) {
// set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
// org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
lastTxId = -1L;
getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
}
updateClusterStateForTxId(stateManager);
}
use of org.apache.hadoop.hdfs.inotify.EventBatch in project nifi by apache.
the class TestGetHDFSEvents method makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship.
@Test
public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception {
Event[] events = getEvents();
EventBatch eventBatch = mock(EventBatch.class);
when(eventBatch.getEvents()).thenReturn(events);
when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
when(eventBatch.getTxid()).thenReturn(100L);
GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
TestRunner runner = TestRunners.newTestRunner(processor);
runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*");
runner.run();
List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
assertEquals(3, successfulFlowFiles.size());
verify(eventBatch).getTxid();
assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
Aggregations