Search in sources :

Example 16 with EventBatch

use of org.apache.hadoop.hdfs.inotify.EventBatch in project hadoop by apache.

the class PBHelperClient method convert.

public static EventBatchList convert(GetEditsFromTxidResponseProto resp) throws IOException {
    final InotifyProtos.EventsListProto list = resp.getEventsList();
    final long firstTxid = list.getFirstTxid();
    final long lastTxid = list.getLastTxid();
    List<EventBatch> batches = Lists.newArrayList();
    if (list.getEventsList().size() > 0) {
        throw new IOException("Can't handle old inotify server response.");
    }
    for (InotifyProtos.EventBatchProto bp : list.getBatchList()) {
        long txid = bp.getTxid();
        if ((txid != -1) && ((txid < firstTxid) || (txid > lastTxid))) {
            throw new IOException("Error converting TxidResponseProto: got a " + "transaction id " + txid + " that was outside the range of [" + firstTxid + ", " + lastTxid + "].");
        }
        List<Event> events = Lists.newArrayList();
        for (InotifyProtos.EventProto p : bp.getEventsList()) {
            switch(p.getType()) {
                case EVENT_CLOSE:
                    InotifyProtos.CloseEventProto close = InotifyProtos.CloseEventProto.parseFrom(p.getContents());
                    events.add(new Event.CloseEvent(close.getPath(), close.getFileSize(), close.getTimestamp()));
                    break;
                case EVENT_CREATE:
                    InotifyProtos.CreateEventProto create = InotifyProtos.CreateEventProto.parseFrom(p.getContents());
                    events.add(new Event.CreateEvent.Builder().iNodeType(createTypeConvert(create.getType())).path(create.getPath()).ctime(create.getCtime()).ownerName(create.getOwnerName()).groupName(create.getGroupName()).perms(convert(create.getPerms())).replication(create.getReplication()).symlinkTarget(create.getSymlinkTarget().isEmpty() ? null : create.getSymlinkTarget()).defaultBlockSize(create.getDefaultBlockSize()).overwrite(create.getOverwrite()).build());
                    break;
                case EVENT_METADATA:
                    InotifyProtos.MetadataUpdateEventProto meta = InotifyProtos.MetadataUpdateEventProto.parseFrom(p.getContents());
                    events.add(new Event.MetadataUpdateEvent.Builder().path(meta.getPath()).metadataType(metadataUpdateTypeConvert(meta.getType())).mtime(meta.getMtime()).atime(meta.getAtime()).replication(meta.getReplication()).ownerName(meta.getOwnerName().isEmpty() ? null : meta.getOwnerName()).groupName(meta.getGroupName().isEmpty() ? null : meta.getGroupName()).perms(meta.hasPerms() ? convert(meta.getPerms()) : null).acls(meta.getAclsList().isEmpty() ? null : convertAclEntry(meta.getAclsList())).xAttrs(meta.getXAttrsList().isEmpty() ? null : convertXAttrs(meta.getXAttrsList())).xAttrsRemoved(meta.getXAttrsRemoved()).build());
                    break;
                case EVENT_RENAME:
                    InotifyProtos.RenameEventProto rename = InotifyProtos.RenameEventProto.parseFrom(p.getContents());
                    events.add(new Event.RenameEvent.Builder().srcPath(rename.getSrcPath()).dstPath(rename.getDestPath()).timestamp(rename.getTimestamp()).build());
                    break;
                case EVENT_APPEND:
                    InotifyProtos.AppendEventProto append = InotifyProtos.AppendEventProto.parseFrom(p.getContents());
                    events.add(new Event.AppendEvent.Builder().path(append.getPath()).newBlock(append.hasNewBlock() && append.getNewBlock()).build());
                    break;
                case EVENT_UNLINK:
                    InotifyProtos.UnlinkEventProto unlink = InotifyProtos.UnlinkEventProto.parseFrom(p.getContents());
                    events.add(new Event.UnlinkEvent.Builder().path(unlink.getPath()).timestamp(unlink.getTimestamp()).build());
                    break;
                case EVENT_TRUNCATE:
                    InotifyProtos.TruncateEventProto truncate = InotifyProtos.TruncateEventProto.parseFrom(p.getContents());
                    events.add(new Event.TruncateEvent(truncate.getPath(), truncate.getFileSize(), truncate.getTimestamp()));
                    break;
                default:
                    throw new RuntimeException("Unexpected inotify event type: " + p.getType());
            }
        }
        batches.add(new EventBatch(txid, events.toArray(new Event[0])));
    }
    return new EventBatchList(batches, resp.getEventsList().getFirstTxid(), resp.getEventsList().getLastTxid(), resp.getEventsList().getSyncTxid());
}
Also used : InotifyProtos(org.apache.hadoop.hdfs.protocol.proto.InotifyProtos) Builder(org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.LocatedBlockProto.Builder) DatanodeInfoBuilder(org.apache.hadoop.hdfs.protocol.DatanodeInfo.DatanodeInfoBuilder) IOException(java.io.IOException) Event(org.apache.hadoop.hdfs.inotify.Event) EventBatchList(org.apache.hadoop.hdfs.inotify.EventBatchList) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch)

Example 17 with EventBatch

use of org.apache.hadoop.hdfs.inotify.EventBatch in project hadoop by apache.

the class TestDFSInotifyEventInputStream method testTwoActiveNNs.

@Test(timeout = 120000)
public void testTwoActiveNNs() throws IOException, MissingEventsException {
    Configuration conf = new HdfsConfiguration();
    MiniQJMHACluster cluster = new MiniQJMHACluster.Builder(conf).build();
    try {
        cluster.getDfsCluster().waitActive();
        cluster.getDfsCluster().transitionToActive(0);
        DFSClient client0 = new DFSClient(cluster.getDfsCluster().getNameNode(0).getNameNodeAddress(), conf);
        DFSClient client1 = new DFSClient(cluster.getDfsCluster().getNameNode(1).getNameNodeAddress(), conf);
        DFSInotifyEventInputStream eis = client0.getInotifyEventStream();
        for (int i = 0; i < 10; i++) {
            client0.mkdirs("/dir" + i, null, false);
        }
        cluster.getDfsCluster().transitionToActive(1);
        for (int i = 10; i < 20; i++) {
            client1.mkdirs("/dir" + i, null, false);
        }
        // make sure that the old active can't read any further than the edits
        // it logged itself (it has no idea whether the in-progress edits from
        // the other writer have actually been committed)
        EventBatch batch = null;
        for (int i = 0; i < 10; i++) {
            batch = waitForNextEvents(eis);
            Assert.assertEquals(1, batch.getEvents().length);
            Assert.assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
            Assert.assertTrue(((Event.CreateEvent) batch.getEvents()[0]).getPath().equals("/dir" + i));
        }
        Assert.assertTrue(eis.poll() == null);
    } finally {
        try {
            cluster.shutdown();
        } catch (ExitUtil.ExitException e) {
        // expected because the old active will be unable to flush the
        // end-of-segment op since it is fenced
        }
    }
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) ExitUtil(org.apache.hadoop.util.ExitUtil) MiniQJMHACluster(org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch) Test(org.junit.Test)

Example 18 with EventBatch

use of org.apache.hadoop.hdfs.inotify.EventBatch in project hadoop by apache.

the class TestDFSUpgradeFromImage method testPreserveEditLogs.

@Test
public void testPreserveEditLogs() throws Exception {
    unpackStorage(HADOOP252_IMAGE, HADOOP_DFS_DIR_TXT);
    /**
     * The pre-created image has the following edits:
     * mkdir /input; mkdir /input/dir1~5
     * copyFromLocal randome_file_1 /input/dir1
     * copyFromLocal randome_file_2 /input/dir2
     * mv /input/dir1/randome_file_1 /input/dir3/randome_file_3
     * rmdir /input/dir1
     */
    Configuration conf = new HdfsConfiguration();
    conf = UpgradeUtilities.initializeStorageStateConf(1, conf);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).format(false).manageDataDfsDirs(false).manageNameDfsDirs(false).startupOption(StartupOption.UPGRADE).build();
    DFSInotifyEventInputStream ieis = cluster.getFileSystem().getInotifyEventStream(0);
    EventBatch batch;
    Event.CreateEvent ce;
    Event.RenameEvent re;
    // mkdir /input
    batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
    assertEquals(1, batch.getEvents().length);
    assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
    ce = (Event.CreateEvent) batch.getEvents()[0];
    assertEquals(ce.getPath(), "/input");
    // mkdir /input/dir1~5
    for (int i = 1; i <= 5; i++) {
        batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
        assertEquals(1, batch.getEvents().length);
        assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
        ce = (Event.CreateEvent) batch.getEvents()[0];
        assertEquals(ce.getPath(), "/input/dir" + i);
    }
    // copyFromLocal randome_file_1~2 /input/dir1~2
    for (int i = 1; i <= 2; i++) {
        batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
        assertEquals(1, batch.getEvents().length);
        if (batch.getEvents()[0].getEventType() != Event.EventType.CREATE) {
            FSImage.LOG.debug("");
        }
        assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CREATE);
        // copyFromLocal randome_file_1 /input/dir1, CLOSE
        batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
        assertEquals(1, batch.getEvents().length);
        assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.CLOSE);
        // copyFromLocal randome_file_1 /input/dir1, CLOSE
        batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
        assertEquals(1, batch.getEvents().length);
        assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.RENAME);
        re = (Event.RenameEvent) batch.getEvents()[0];
        assertEquals(re.getDstPath(), "/input/dir" + i + "/randome_file_" + i);
    }
    // mv /input/dir1/randome_file_1 /input/dir3/randome_file_3
    long txIDBeforeRename = batch.getTxid();
    batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
    assertEquals(1, batch.getEvents().length);
    assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.RENAME);
    re = (Event.RenameEvent) batch.getEvents()[0];
    assertEquals(re.getDstPath(), "/input/dir3/randome_file_3");
    // rmdir /input/dir1
    batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
    assertEquals(1, batch.getEvents().length);
    assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.UNLINK);
    assertEquals(((Event.UnlinkEvent) batch.getEvents()[0]).getPath(), "/input/dir1");
    long lastTxID = batch.getTxid();
    // Start inotify from the tx before rename /input/dir1/randome_file_1
    ieis = cluster.getFileSystem().getInotifyEventStream(txIDBeforeRename);
    batch = TestDFSInotifyEventInputStream.waitForNextEvents(ieis);
    assertEquals(1, batch.getEvents().length);
    assertTrue(batch.getEvents()[0].getEventType() == Event.EventType.RENAME);
    re = (Event.RenameEvent) batch.getEvents()[0];
    assertEquals(re.getDstPath(), "/input/dir3/randome_file_3");
    // Try to read beyond available edits
    ieis = cluster.getFileSystem().getInotifyEventStream(lastTxID + 1);
    assertNull(ieis.poll());
    cluster.shutdown();
}
Also used : Configuration(org.apache.hadoop.conf.Configuration) CreateEvent(org.apache.hadoop.hdfs.inotify.Event.CreateEvent) CreateEvent(org.apache.hadoop.hdfs.inotify.Event.CreateEvent) Event(org.apache.hadoop.hdfs.inotify.Event) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch) Test(org.junit.Test)

Example 19 with EventBatch

use of org.apache.hadoop.hdfs.inotify.EventBatch in project nifi by apache.

the class GetHDFSEvents method onTrigger.

@Override
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
    final StateManager stateManager = context.getStateManager();
    try {
        StateMap state = stateManager.getState(Scope.CLUSTER);
        String txIdAsString = state.get(LAST_TX_ID);
        if (txIdAsString != null && !"".equals(txIdAsString)) {
            lastTxId = Long.parseLong(txIdAsString);
        }
    } catch (IOException e) {
        getLogger().error("Unable to retrieve last transaction ID. Must retrieve last processed transaction ID before processing can occur.", e);
        context.yield();
        return;
    }
    try {
        final int retries = context.getProperty(NUMBER_OF_RETRIES_FOR_POLL).asInteger();
        final TimeUnit pollDurationTimeUnit = TimeUnit.MICROSECONDS;
        final long pollDuration = context.getProperty(POLL_DURATION).asTimePeriod(pollDurationTimeUnit);
        final DFSInotifyEventInputStream eventStream = lastTxId == -1L ? getHdfsAdmin().getInotifyEventStream() : getHdfsAdmin().getInotifyEventStream(lastTxId);
        final EventBatch eventBatch = getEventBatch(eventStream, pollDuration, pollDurationTimeUnit, retries);
        if (eventBatch != null && eventBatch.getEvents() != null) {
            if (eventBatch.getEvents().length > 0) {
                List<FlowFile> flowFiles = new ArrayList<>(eventBatch.getEvents().length);
                for (Event e : eventBatch.getEvents()) {
                    if (toProcessEvent(context, e)) {
                        getLogger().debug("Creating flow file for event: {}.", new Object[] { e });
                        final String path = getPath(e);
                        FlowFile flowFile = session.create();
                        flowFile = session.putAttribute(flowFile, CoreAttributes.MIME_TYPE.key(), "application/json");
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_TYPE, e.getEventType().name());
                        flowFile = session.putAttribute(flowFile, EventAttributes.EVENT_PATH, path);
                        flowFile = session.write(flowFile, new OutputStreamCallback() {

                            @Override
                            public void process(OutputStream out) throws IOException {
                                out.write(OBJECT_MAPPER.writeValueAsBytes(e));
                            }
                        });
                        flowFiles.add(flowFile);
                    }
                }
                for (FlowFile flowFile : flowFiles) {
                    final String path = flowFile.getAttribute(EventAttributes.EVENT_PATH);
                    final String transitUri = path.startsWith("/") ? "hdfs:/" + path : "hdfs://" + path;
                    getLogger().debug("Transferring flow file {} and creating provenance event with URI {}.", new Object[] { flowFile, transitUri });
                    session.transfer(flowFile, REL_SUCCESS);
                    session.getProvenanceReporter().receive(flowFile, transitUri);
                }
            }
            lastTxId = eventBatch.getTxid();
        }
    } catch (IOException | InterruptedException e) {
        getLogger().error("Unable to get notification information: {}", new Object[] { e });
        context.yield();
        return;
    } catch (MissingEventsException e) {
        // set lastTxId to -1 and update state. This may cause events not to be processed. The reason this exception is thrown is described in the
        // org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStrea API. It suggests tuning a couple parameters if this API is used.
        lastTxId = -1L;
        getLogger().error("Unable to get notification information. Setting transaction id to -1. This may cause some events to get missed. " + "Please see javadoc for org.apache.hadoop.hdfs.client.HdfsAdmin#getInotifyEventStream: {}", new Object[] { e });
    }
    updateClusterStateForTxId(stateManager);
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) StateMap(org.apache.nifi.components.state.StateMap) OutputStream(java.io.OutputStream) ArrayList(java.util.ArrayList) IOException(java.io.IOException) MissingEventsException(org.apache.hadoop.hdfs.inotify.MissingEventsException) StateManager(org.apache.nifi.components.state.StateManager) TimeUnit(java.util.concurrent.TimeUnit) Event(org.apache.hadoop.hdfs.inotify.Event) DFSInotifyEventInputStream(org.apache.hadoop.hdfs.DFSInotifyEventInputStream) OutputStreamCallback(org.apache.nifi.processor.io.OutputStreamCallback) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch)

Example 20 with EventBatch

use of org.apache.hadoop.hdfs.inotify.EventBatch in project nifi by apache.

the class TestGetHDFSEvents method makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship.

@Test
public void makeSureHappyPathForProcessingEventsSendsFlowFilesToCorrectRelationship() throws Exception {
    Event[] events = getEvents();
    EventBatch eventBatch = mock(EventBatch.class);
    when(eventBatch.getEvents()).thenReturn(events);
    when(inotifyEventInputStream.poll(1000000L, TimeUnit.MICROSECONDS)).thenReturn(eventBatch);
    when(hdfsAdmin.getInotifyEventStream()).thenReturn(inotifyEventInputStream);
    when(eventBatch.getTxid()).thenReturn(100L);
    GetHDFSEvents processor = new TestableGetHDFSEvents(kerberosProperties, hdfsAdmin);
    TestRunner runner = TestRunners.newTestRunner(processor);
    runner.setProperty(GetHDFSEvents.POLL_DURATION, "1 second");
    runner.setProperty(GetHDFSEvents.HDFS_PATH_TO_WATCH, "/some/path(/)?.*");
    runner.run();
    List<MockFlowFile> successfulFlowFiles = runner.getFlowFilesForRelationship(GetHDFSEvents.REL_SUCCESS);
    assertEquals(3, successfulFlowFiles.size());
    verify(eventBatch).getTxid();
    assertEquals("100", runner.getProcessContext().getStateManager().getState(Scope.CLUSTER).get("last.tx.id"));
}
Also used : MockFlowFile(org.apache.nifi.util.MockFlowFile) TestRunner(org.apache.nifi.util.TestRunner) Event(org.apache.hadoop.hdfs.inotify.Event) EventBatch(org.apache.hadoop.hdfs.inotify.EventBatch) Test(org.junit.Test)

Aggregations

EventBatch (org.apache.hadoop.hdfs.inotify.EventBatch)22 Event (org.apache.hadoop.hdfs.inotify.Event)13 Test (org.junit.Test)12 IOException (java.io.IOException)6 Configuration (org.apache.hadoop.conf.Configuration)5 MockFlowFile (org.apache.nifi.util.MockFlowFile)5 TestRunner (org.apache.nifi.util.TestRunner)5 MissingEventsException (org.apache.hadoop.hdfs.inotify.MissingEventsException)4 MiniQJMHACluster (org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster)4 ArrayList (java.util.ArrayList)3 DFSInotifyEventInputStream (org.apache.hadoop.hdfs.DFSInotifyEventInputStream)3 OutputStream (java.io.OutputStream)2 FsPermission (org.apache.hadoop.fs.permission.FsPermission)2 TraceScope (org.apache.htrace.core.TraceScope)2 AlluxioURI (alluxio.AlluxioURI)1 Constants (alluxio.Constants)1 SyncInfo (alluxio.SyncInfo)1 ConcurrentHashSet (alluxio.collections.ConcurrentHashSet)1 PropertyKey (alluxio.conf.PropertyKey)1 InvalidPathException (alluxio.exception.InvalidPathException)1