Search in sources :

Example 36 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamFileJanitorTestBase method testCleanupGeneration.

/**
 * Test the clean up of the janitor, also checks that user without privilege on the stream is also able to clean up
 * the stream.
 */
@Test
public void testCleanupGeneration() throws Exception {
    // Create a stream and performs couple truncate
    String streamName = "testCleanupGeneration";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    authorizer.grant(Authorizable.fromEntityId(streamId), ALICE, EnumSet.of(Action.ADMIN));
    StreamAdmin streamAdmin = getStreamAdmin();
    streamAdmin.create(streamId);
    StreamConfig streamConfig = streamAdmin.getConfig(streamId);
    for (int i = 0; i < 5; i++) {
        FileWriter<StreamEvent> writer = createWriter(streamId);
        writer.append(StreamFileTestUtils.createEvent(System.currentTimeMillis(), "Testing"));
        writer.close();
        // Call cleanup before truncate. The current generation should stand.
        janitor.clean(streamConfig.getLocation(), streamConfig.getTTL(), System.currentTimeMillis());
        verifyGeneration(streamConfig, i);
        streamAdmin.truncate(streamId);
    }
    SecurityRequestContext.setUserId(BOB.getName());
    int generation = StreamUtils.getGeneration(streamConfig);
    Assert.assertEquals(5, generation);
    janitor.clean(streamConfig.getLocation(), streamConfig.getTTL(), System.currentTimeMillis());
    // Verify the stream directory should only contains the generation directory
    for (Location location : streamConfig.getLocation().list()) {
        if (location.isDirectory()) {
            Assert.assertEquals(generation, Integer.parseInt(location.getName()));
        }
    }
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 37 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class ConcurrentStreamWriterTestBase method generateFile.

private FileInfo generateFile(NamespacedLocationFactory locationFactory, int id, int events) throws IOException {
    NamespaceId dummyNs = new NamespaceId("dummy");
    Location eventLocation = locationFactory.get(dummyNs).append(UUID.randomUUID().toString());
    Location indexLocation = locationFactory.get(dummyNs).append(UUID.randomUUID().toString());
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(eventLocation), Locations.newOutputSupplier(indexLocation), 1000L);
    for (int i = 0; i < events; i++) {
        writer.append(StreamFileTestUtils.createEvent(System.currentTimeMillis(), "Message " + i + " from " + id));
        if (i % 50 == 0) {
            writer.flush();
        }
    }
    writer.flush();
    return new FileInfo(eventLocation, indexLocation, writer, events);
}
Also used : NamespaceId(co.cask.cdap.proto.id.NamespaceId) StreamDataFileWriter(co.cask.cdap.data.stream.StreamDataFileWriter) Location(org.apache.twill.filesystem.Location)

Example 38 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class ConcurrentStreamWriterTestBase method testConcurrentAppendFile.

@Test
public void testConcurrentAppendFile() throws Exception {
    final String streamName = "testConcurrentFile";
    NamespaceId namespace = new NamespaceId("namespace");
    StreamId streamId = namespace.stream(streamName);
    StreamAdmin streamAdmin = new TestStreamAdmin(getNamespacedLocationFactory(), Long.MAX_VALUE, 1000);
    int threads = Runtime.getRuntime().availableProcessors() * 4;
    StreamFileWriterFactory fileWriterFactory = createStreamFileWriterFactory();
    final ConcurrentStreamWriter streamWriter = createStreamWriter(streamId, streamAdmin, threads, fileWriterFactory);
    int msgCount = 10000;
    NamespacedLocationFactory locationFactory = getNamespacedLocationFactory();
    // Half of the threads will be calling appendFile, then other half append event one by one
    // Prepare the files first, each file has 10000 events.
    final List<FileInfo> fileInfos = Lists.newArrayList();
    for (int i = 0; i < threads / 2; i++) {
        fileInfos.add(generateFile(locationFactory, i, msgCount));
    }
    // Append file and write events
    final CountDownLatch startLatch = new CountDownLatch(1);
    final CountDownLatch completion = new CountDownLatch(threads);
    ExecutorService executor = Executors.newFixedThreadPool(threads);
    for (int i = 0; i < threads / 2; i++) {
        executor.execute(createAppendFileTask(streamId, streamWriter, fileInfos.get(i), startLatch, completion));
    }
    for (int i = threads / 2; i < threads; i++) {
        executor.execute(createWriterTask(streamId, streamWriter, i, msgCount, 50, startLatch, completion));
    }
    startLatch.countDown();
    Assert.assertTrue(completion.await(4, TimeUnit.MINUTES));
    // Verify all events are written.
    // There should be only one partition
    Location partitionLocation = streamAdmin.getConfig(streamId).getLocation().list().get(0);
    List<Location> files = partitionLocation.list();
    List<StreamEvent> events = Lists.newArrayListWithCapacity(threads * msgCount);
    for (Location location : files) {
        // Only create reader for the event file
        if (StreamFileType.getType(location.getName()) != StreamFileType.EVENT) {
            continue;
        }
        StreamDataFileReader reader = StreamDataFileReader.create(Locations.newInputSupplier(location));
        reader.read(events, Integer.MAX_VALUE, 0, TimeUnit.SECONDS);
    }
    Assert.assertTrue(verifyEvents(threads, msgCount, events));
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamEvent(co.cask.cdap.api.flow.flowlet.StreamEvent) NamespacedLocationFactory(co.cask.cdap.common.namespace.NamespacedLocationFactory) CountDownLatch(java.util.concurrent.CountDownLatch) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) NoopStreamAdmin(co.cask.cdap.data.stream.NoopStreamAdmin) StreamFileWriterFactory(co.cask.cdap.data.stream.StreamFileWriterFactory) LocationStreamFileWriterFactory(co.cask.cdap.data.runtime.LocationStreamFileWriterFactory) ExecutorService(java.util.concurrent.ExecutorService) NamespaceId(co.cask.cdap.proto.id.NamespaceId) StreamDataFileReader(co.cask.cdap.data.stream.StreamDataFileReader) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 39 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class StreamFileSizeFetcherTest method testFetchSize.

@Test
public void testFetchSize() throws Exception {
    final String streamName = "testFetchSize";
    StreamId streamId = NamespaceId.DEFAULT.stream(streamName);
    final int nbEvents = 100;
    StreamAdmin streamAdmin = new TestStreamAdmin(namespacedLocationFactory, Long.MAX_VALUE, 1000);
    streamAdmin.create(streamId);
    StreamConfig config = streamAdmin.getConfig(streamId);
    try {
        StreamUtils.fetchStreamFilesSize(StreamUtils.createGenerationLocation(config.getLocation(), StreamUtils.getGeneration(config)));
        Assert.fail("No stream file created yet");
    } catch (IOException e) {
    // Expected
    }
    // Creates a stream file that has no event inside
    Location partitionLocation = StreamUtils.createPartitionLocation(config.getLocation(), 0, Long.MAX_VALUE);
    Location dataLocation = StreamUtils.createStreamLocation(partitionLocation, "writer", 0, StreamFileType.EVENT);
    Location idxLocation = StreamUtils.createStreamLocation(partitionLocation, "writer", 0, StreamFileType.INDEX);
    StreamDataFileWriter writer = new StreamDataFileWriter(Locations.newOutputSupplier(dataLocation), Locations.newOutputSupplier(idxLocation), 10000L);
    // Write 100 events to the stream
    for (int i = 0; i < nbEvents; i++) {
        writer.append(StreamFileTestUtils.createEvent(i, "foo"));
    }
    writer.close();
    long size = StreamUtils.fetchStreamFilesSize(StreamUtils.createGenerationLocation(config.getLocation(), StreamUtils.getGeneration(config)));
    Assert.assertTrue(size > 0);
    Assert.assertEquals(dataLocation.length(), size);
}
Also used : StreamId(co.cask.cdap.proto.id.StreamId) StreamAdmin(co.cask.cdap.data2.transaction.stream.StreamAdmin) NoopStreamAdmin(co.cask.cdap.data.stream.NoopStreamAdmin) StreamConfig(co.cask.cdap.data2.transaction.stream.StreamConfig) IOException(java.io.IOException) StreamDataFileWriter(co.cask.cdap.data.stream.StreamDataFileWriter) Location(org.apache.twill.filesystem.Location) Test(org.junit.Test)

Example 40 with Location

use of org.apache.twill.filesystem.Location in project cdap by caskdata.

the class ClicksAndViewsMapReduceTest method getDataFromFile.

private Set<String> getDataFromFile() throws Exception {
    DataSetManager<PartitionedFileSet> cleanRecords = getDataset(ClicksAndViews.JOINED);
    Set<String> cleanData = new HashSet<>();
    // we configured the MapReduce to write to this partition when starting it
    PartitionDetail partition = cleanRecords.get().getPartition(PartitionKey.builder().addLongField("runtime", OUTPUT_PARTITION_RUNTIME).build());
    Assert.assertNotNull(partition);
    for (Location location : partition.getLocation().list()) {
        if (location.getName().startsWith("part-")) {
            try (BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(location.getInputStream()))) {
                String line;
                while ((line = bufferedReader.readLine()) != null) {
                    cleanData.add(line);
                }
            }
        }
    }
    return cleanData;
}
Also used : InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) PartitionedFileSet(co.cask.cdap.api.dataset.lib.PartitionedFileSet) PartitionDetail(co.cask.cdap.api.dataset.lib.PartitionDetail) HashSet(java.util.HashSet) Location(org.apache.twill.filesystem.Location)

Aggregations

Location (org.apache.twill.filesystem.Location)272 Test (org.junit.Test)110 IOException (java.io.IOException)67 File (java.io.File)45 FileSet (co.cask.cdap.api.dataset.lib.FileSet)32 LocationFactory (org.apache.twill.filesystem.LocationFactory)32 LocalLocationFactory (org.apache.twill.filesystem.LocalLocationFactory)31 PartitionedFileSet (co.cask.cdap.api.dataset.lib.PartitionedFileSet)27 StreamEvent (co.cask.cdap.api.flow.flowlet.StreamEvent)27 CConfiguration (co.cask.cdap.common.conf.CConfiguration)20 HashMap (java.util.HashMap)20 NamespaceId (co.cask.cdap.proto.id.NamespaceId)19 Manifest (java.util.jar.Manifest)18 StreamId (co.cask.cdap.proto.id.StreamId)17 ArrayList (java.util.ArrayList)15 DatasetFramework (co.cask.cdap.data2.dataset2.DatasetFramework)13 OutputStream (java.io.OutputStream)13 TimePartitionedFileSet (co.cask.cdap.api.dataset.lib.TimePartitionedFileSet)11 ApplicationManager (co.cask.cdap.test.ApplicationManager)11 HashSet (java.util.HashSet)11