Search in sources :

Example 41 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class FixedWindowRollingPolicy method rollover.

@Override
public void rollover() throws RolloverFailure {
    Location parentLocation = getParent(activeFileLocation);
    if (parentLocation == null) {
        return;
    }
    // If maxIndex <= 0, then there is no file renaming to be done.
    if (maxIndex >= 0) {
        try {
            // close outputstream of active location
            closeable.close();
            String fileName = fileNamePattern.convertInt(maxIndex);
            // maximum index otherwise we will end up deleting rolled over file
            if (processedIndex == maxIndex) {
                Location deleteLocation = parentLocation.append(fileName);
                // no need to proceed further if we are not able to delete location so throw exception
                if (deleteLocation.exists() && !deleteLocation.delete()) {
                    LOG.warn("Failed to delete location: {}", deleteLocation.toURI().toString());
                    throw new RolloverFailure(String.format("Not able to delete file: %s", deleteLocation.toURI().toString()));
                }
                processedIndex--;
            }
            for (int i = processedIndex; i >= minIndex; i--, processedIndex--) {
                String toRenameStr = fileNamePattern.convertInt(i);
                Location toRename = parentLocation.append(toRenameStr);
                // no point in trying to rename an non existent file
                if (toRename.exists()) {
                    Location newName = parentLocation.append(fileNamePattern.convertInt(i + 1));
                    // throw exception if rename fails, so that in next iteration of rollover, it will be retried
                    if (toRename.renameTo(newName) == null) {
                        LOG.warn("Failed to rename {} to {}", toRename.toURI().toString(), newName.toURI().toString());
                        throw new RolloverFailure(String.format("Failed to rename %s to %s", toRename.toURI().toString(), newName.toURI().toString()));
                    }
                } else {
                    LOG.trace("Skipping roll-over for inexistent file {}", toRename.toURI().toString());
                }
            }
            if (activeFileLocation.renameTo(parentLocation.append(fileNamePattern.convertInt(minIndex))) == null) {
                LOG.warn("Failed to rename location: {}", activeFileLocation.toURI().toString());
                throw new RolloverFailure(String.format("Not able to rename file: %s", activeFileLocation.toURI().toString()));
            }
            // reset max processed index after rename of active location has been processed successfully
            processedIndex = maxIndex;
        } catch (IOException e) {
            RolloverFailure f = new RolloverFailure(e.getMessage());
            f.addSuppressed(e);
            throw f;
        }
    }
}
Also used : RolloverFailure(ch.qos.logback.core.rolling.RolloverFailure) IOException(java.io.IOException) Location(org.apache.twill.filesystem.Location)

Example 42 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class LocationManager method getLocationOutputStream.

/**
 * Returns outpustream for log file created as: <basePath>/namespaceId/applicationId/<filePath>
 *
 * @param locationIdentifier location identifier for this event
 * @param filePath           filePath for this event
 * @return returns {@link LocationOutputStream} for an event
 * @throws IOException throws exception while creating a file
 */
OutputStream getLocationOutputStream(LocationIdentifier locationIdentifier, String filePath) throws IOException {
    if (activeLocations.containsKey(locationIdentifier)) {
        return activeLocations.get(locationIdentifier);
    }
    Location logFile = getLogLocation(locationIdentifier).append(filePath);
    Location logDir = getParent(logFile);
    if (logDir == null) {
        // this should never happen
        LOG.error("Parent Directory for {} is null", logFile.toURI().toString());
        throw new IOException(String.format("Parent Directory for %s is null", logFile.toURI().toString()));
    }
    // check if parent directories exist
    mkdirsIfNotExists(logDir, dirPermissions);
    if (logFile.exists()) {
        // The file name for a given application exists if the appender was stopped and then started again but file was
        // not rolled over. In this case, since the roll over size is typically small, we can rename the old file and
        // copy its contents to new file and delete old file.
        long now = System.currentTimeMillis();
        // rename existing file to temp file
        Location tempLocation = logFile.renameTo(logDir.append("temp-" + Long.toString(now)));
        if (tempLocation == null) {
            throw new IOException(String.format("Can not rename file %s", logFile.toURI().toString()));
        }
        try (InputStream inputStream = tempLocation.getInputStream()) {
            // create new file and open outputstream on it
            logFile.createNew(filePermissions);
            // TODO: Handle existing file in a better way rather than copying it over
            OutputStream outputStream = new LocationOutputStream(logFile, logFile.getOutputStream(filePermissions), System.currentTimeMillis());
            activeLocations.put(locationIdentifier, (LocationOutputStream) outputStream);
            ByteStreams.copy(inputStream, outputStream);
            outputStream.flush();
        } catch (IOException e) {
            activeLocations.remove(locationIdentifier);
            throw e;
        }
        deleteTempFiles(logDir, tempLocation);
    } else {
        // create file with correct permissions
        logFile.createNew(filePermissions);
        activeLocations.put(locationIdentifier, new LocationOutputStream(logFile, logFile.getOutputStream(filePermissions), System.currentTimeMillis()));
    }
    return activeLocations.get(locationIdentifier);
}
Also used : InputStream(java.io.InputStream) OutputStream(java.io.OutputStream) IOException(java.io.IOException) Location(org.apache.twill.filesystem.Location)

Example 43 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class FileMetaDataReader method getFilesInOldFormat.

private List<LogLocation> getFilesInOldFormat(Table metaTable, LogPathIdentifier logPathIdentifier, long endTimestampMs) throws Exception {
    List<LogLocation> files = new ArrayList<>();
    final Row cols = metaTable.get(getOldRowKey(logPathIdentifier));
    for (final Map.Entry<byte[], byte[]> entry : cols.getColumns().entrySet()) {
        // old rowkey format length is 8 bytes (just the event timestamp is the column key)
        if (entry.getKey().length == 8) {
            long eventTimestamp = Bytes.toLong(entry.getKey());
            if (eventTimestamp <= endTimestampMs) {
                Location fileLocation = impersonator.doAs(new NamespaceId(logPathIdentifier.getNamespaceId()), new Callable<Location>() {

                    @Override
                    public Location call() throws Exception {
                        // we stored uri in old format
                        return Locations.getLocationFromAbsolutePath(locationFactory, new URI(Bytes.toString(entry.getValue())).getPath());
                    }
                });
                // old format
                files.add(new LogLocation(LogLocation.VERSION_0, eventTimestamp, // use 0 as current time as this information is not available
                0, fileLocation, logPathIdentifier.getNamespaceId(), impersonator));
            }
        } else {
            LOG.warn("For row-key {}, got column entry with unexpected key length {}", logPathIdentifier.getOldRowkey(), entry.getKey().length);
        }
    }
    return files;
}
Also used : ArrayList(java.util.ArrayList) URI(java.net.URI) URISyntaxException(java.net.URISyntaxException) LogLocation(co.cask.cdap.logging.write.LogLocation) Row(co.cask.cdap.api.dataset.table.Row) NamespaceId(co.cask.cdap.proto.id.NamespaceId) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Location(org.apache.twill.filesystem.Location) LogLocation(co.cask.cdap.logging.write.LogLocation)

Example 44 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamDataFileTestBase method testOffset.

@Test
public void testOffset() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    Location eventFile = dir.getTempFile(".dat");
    Location indexFile = dir.getTempFile(".idx");
    // Writer 100 events with different timestamps.
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10L);
    for (int i = 0; i < 100; i++) {
        writer.append(StreamFileTestUtils.createEvent(i, "Testing " + i));
    }
    writer.close();
    StreamDataFileIndex index = new StreamDataFileIndex(Locations.newInputSupplier(indexFile));
    StreamDataFileIndexIterator iterator = index.indexIterator();
    while (iterator.nextIndexEntry()) {
        StreamDataFileReader reader = StreamDataFileReader.createWithOffset(Locations.newInputSupplier(eventFile), Locations.newInputSupplier(indexFile), iterator.currentPosition() - 1);
        List<StreamEvent> events = Lists.newArrayList();
        Assert.assertEquals(1, reader.read(events, 1, 0, TimeUnit.SECONDS));
        Assert.assertEquals(iterator.currentTimestamp(), events.get(0).getTimestamp());
    }
}
Also used : StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 45 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamDataFileTestBase method testFilter.

@Test
public void testFilter() throws Exception {
    Location dir = StreamFileTestUtils.createTempDir(getLocationFactory());
    final Location eventFile = dir.getTempFile(".dat");
    final Location indexFile = dir.getTempFile(".idx");
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventFile), Locations.newOutputSupplier(indexFile), 10000L);
    writer.append(StreamFileTestUtils.createEvent(0, "Message 1"));
    writer.flush();
    StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(eventFile));
    List<StreamEvent> events = Lists.newArrayList();
    final AtomicBoolean active = new AtomicBoolean(false);
    ReadFilter filter = new ReadFilter() {

        private long nextTimestamp = -1L;

        @Override
        public void reset() {
            active.set(false);
            nextTimestamp = -1L;
        }

        @Override
        public boolean acceptTimestamp(long timestamp) {
            active.set(true);
            nextTimestamp = timestamp + 1;
            return false;
        }

        @Override
        public long getNextTimestampHint() {
            return nextTimestamp;
        }
    };
    Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
    Assert.assertTrue(active.get());
    filter.reset();
    Assert.assertEquals(0, reader.read(events, 1, 0, TimeUnit.SECONDS, filter));
    Assert.assertFalse(active.get());
    reader.close();
    writer.close();
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) ReadFilter(co.cask.cdap.data.file.ReadFilter) TTLReadFilter(co.cask.cdap.data.file.filter.TTLReadFilter) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Aggregations

Location (org.apache.twill.filesystem.Location)272 Test (org.junit.Test)110 IOException (java.io.IOException)67 File (java.io.File)45 FileSet (co.cask.cdap.api.dataset.lib.FileSet)32 LocationFactory (org.apache.twill.filesystem.LocationFactory)32 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)31 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)27 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)27 CConfiguration (co.cask.cdap.common.conf.CConfiguration)20 HashMap (java.util.HashMap)20 NamespaceId (co.cask.cdap.proto.id.NamespaceId)19 Manifest (java.util.jar.Manifest)18 StreamId (co.cask.cdap.proto.id.StreamId)17 ArrayList (java.util.ArrayList)15 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)13 OutputStream (java.io.OutputStream)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 ApplicationManager (co.cask.cdap.test.ApplicationManager)11 HashSet (java.util.HashSet)11