Search in sources :

Example 41 with SeekableByteChannel

use of java.nio.channels.SeekableByteChannel in project beam by apache.

the class GcsUtil method open.

/**
 * Opens an object in GCS.
 *
 * <p>Returns a SeekableByteChannel that provides access to data in the bucket.
 *
 * @param path the GCS filename to read from
 * @param readOptions Fine-grained options for behaviors of retries, buffering, etc.
 * @return a SeekableByteChannel that can read the object data
 */
@VisibleForTesting
SeekableByteChannel open(GcsPath path, GoogleCloudStorageReadOptions readOptions) throws IOException {
    HashMap<String, String> baseLabels = new HashMap<>();
    baseLabels.put(MonitoringInfoConstants.Labels.PTRANSFORM, "");
    baseLabels.put(MonitoringInfoConstants.Labels.SERVICE, "Storage");
    baseLabels.put(MonitoringInfoConstants.Labels.METHOD, "GcsGet");
    baseLabels.put(MonitoringInfoConstants.Labels.RESOURCE, GcpResourceIdentifiers.cloudStorageBucket(path.getBucket()));
    baseLabels.put(MonitoringInfoConstants.Labels.GCS_PROJECT_ID, googleCloudStorageOptions.getProjectId());
    baseLabels.put(MonitoringInfoConstants.Labels.GCS_BUCKET, path.getBucket());
    ServiceCallMetric serviceCallMetric = new ServiceCallMetric(MonitoringInfoConstants.Urns.API_REQUEST_COUNT, baseLabels);
    try {
        SeekableByteChannel channel = googleCloudStorage.open(new StorageResourceId(path.getBucket(), path.getObject()), readOptions);
        serviceCallMetric.call("ok");
        return channel;
    } catch (IOException e) {
        if (e.getCause() instanceof GoogleJsonResponseException) {
            serviceCallMetric.call(((GoogleJsonResponseException) e.getCause()).getDetails().getCode());
        }
        throw e;
    }
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) GoogleJsonResponseException(com.google.api.client.googleapis.json.GoogleJsonResponseException) HashMap(java.util.HashMap) IOException(java.io.IOException) StorageResourceId(com.google.cloud.hadoop.gcsio.StorageResourceId) ServiceCallMetric(org.apache.beam.runners.core.metrics.ServiceCallMetric) VisibleForTesting(org.apache.beam.vendor.guava.v26_0_jre.com.google.common.annotations.VisibleForTesting)

Example 42 with SeekableByteChannel

use of java.nio.channels.SeekableByteChannel in project beam by apache.

the class IsmReaderImpl method overKeyComponents.

@Override
public IsmPrefixReaderIterator overKeyComponents(List<?> keyComponents, int shardId, RandomAccessData keyBytes) throws IOException {
    checkNotNull(keyComponents);
    checkNotNull(keyBytes);
    SideInputReadCounter readCounter = IsmReader.getCurrentSideInputCounter();
    if (keyComponents.isEmpty()) {
        checkArgument(shardId == 0 && keyBytes.size() == 0, "Expected shard id to be 0 and key bytes to be empty " + "but got shard id %s and key bytes of length %s", shardId, keyBytes.size());
    }
    checkArgument(keyComponents.size() <= coder.getKeyComponentCoders().size(), "Expected at most %s key component(s) but received %s.", coder.getKeyComponentCoders().size(), keyComponents);
    Optional<SeekableByteChannel> inChannel = initializeFooterAndShardIndex(Optional.<SeekableByteChannel>absent(), readCounter);
    // If this file is empty, we can return an empty iterator.
    if (footer.getNumberOfKeys() == 0) {
        return new EmptyIsmPrefixReaderIterator(keyComponents);
    }
    // iterator over all the keys.
    if (keyComponents.size() < coder.getNumberOfShardKeyCoders(keyComponents)) {
        return new ShardAwareIsmPrefixReaderIterator(keyComponents, openIfNeeded(inChannel), readCounter);
    }
    // we know that we can return an empty reader iterator.
    if (!shardIdToShardMap.containsKey(shardId)) {
        return new EmptyIsmPrefixReaderIterator(keyComponents);
    }
    inChannel = initializeForKeyedRead(shardId, inChannel, readCounter);
    closeIfPresent(inChannel);
    if (!bloomFilterMightContain(keyBytes)) {
        return new EmptyIsmPrefixReaderIterator(keyComponents);
    }
    // Otherwise we may actually contain the key so construct a reader iterator
    // which will fetch the data blocks containing the requested key prefix.
    // We find the first key in the index which may contain our prefix
    RandomAccessData floorKey = indexPerShard.get(shardId).floorKey(keyBytes);
    // We compute an upper bound on the key prefix by incrementing the prefix
    RandomAccessData keyBytesUpperBound = keyBytes.increment();
    // Compute the sub-range of the index map that we want to iterate over since
    // any of these blocks may contain the key prefix.
    Iterator<IsmShardKey> blockEntries = indexPerShard.get(shardId).subMap(floorKey, keyBytesUpperBound).values().iterator();
    return new WithinShardIsmPrefixReaderIterator(keyComponents, keyBytes, keyBytesUpperBound, blockEntries, readCounter);
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) RandomAccessData(org.apache.beam.runners.dataflow.util.RandomAccessData)

Example 43 with SeekableByteChannel

use of java.nio.channels.SeekableByteChannel in project beam by apache.

the class IsmReaderImpl method initializeBloomFilterAndIndexPerShard.

/**
 * Initializes the Bloom filter and index per shard. We prepopulate empty indices for shards where
 * the index offset matches the following shard block offset. Re-uses the provided channel,
 * returning it or a new one if this method was required to open one.
 */
private synchronized Optional<SeekableByteChannel> initializeBloomFilterAndIndexPerShard(Optional<SeekableByteChannel> inChannel) throws IOException {
    if (indexPerShard != null) {
        checkState(bloomFilter != null, "Expected Bloom filter to have been initialized.");
        return inChannel;
    }
    SeekableByteChannel rawChannel = openIfNeeded(inChannel);
    // Set the position to where the bloom filter is and read it in.
    position(rawChannel, footer.getBloomFilterPosition());
    bloomFilter = ScalableBloomFilterCoder.of().decode(Channels.newInputStream(rawChannel));
    indexPerShard = new HashMap<>();
    // If a shard is small, it may not contain an index and we can detect this and
    // prepopulate the shard index map with an empty entry if the start of the index
    // and start of the next block are equal
    Iterator<IsmShard> shardIterator = shardOffsetToShardMap.values().iterator();
    // If file is empty we just return here.
    if (!shardIterator.hasNext()) {
        return Optional.of(rawChannel);
    }
    // If the current shard's index position is equal to the next shards block offset
    // then we know that the index contains no data and we can pre-populate it with
    // the empty map.
    IsmShard currentShard = shardIterator.next();
    while (shardIterator.hasNext()) {
        IsmShard nextShard = shardIterator.next();
        if (currentShard.getIndexOffset() == nextShard.getBlockOffset()) {
            indexPerShard.put(currentShard.getId(), ImmutableSortedMap.<RandomAccessData, IsmShardKey>orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR).put(new RandomAccessData(0), new IsmShardKey(IsmReaderImpl.this.resourceId.toString(), new RandomAccessData(0), currentShard.getBlockOffset(), currentShard.getIndexOffset())).build());
        }
        currentShard = nextShard;
    }
    // start of the Bloom filter, then we know that the index is empty.
    if (currentShard.getIndexOffset() == footer.getBloomFilterPosition()) {
        indexPerShard.put(currentShard.getId(), ImmutableSortedMap.<RandomAccessData, IsmShardKey>orderedBy(RandomAccessData.UNSIGNED_LEXICOGRAPHICAL_COMPARATOR).put(new RandomAccessData(0), new IsmShardKey(IsmReaderImpl.this.resourceId.toString(), new RandomAccessData(0), currentShard.getBlockOffset(), currentShard.getIndexOffset())).build());
    }
    return Optional.of(rawChannel);
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) RandomAccessData(org.apache.beam.runners.dataflow.util.RandomAccessData) IsmShard(org.apache.beam.runners.dataflow.internal.IsmFormat.IsmShard)

Example 44 with SeekableByteChannel

use of java.nio.channels.SeekableByteChannel in project beam by apache.

the class IsmReaderImpl method open.

/**
 * Opens a new channel.
 */
private SeekableByteChannel open() throws IOException {
    ReadableByteChannel channel = FileSystems.open(resourceId);
    Preconditions.checkArgument(channel instanceof SeekableByteChannel, "IsmReaderImpl requires a SeekableByteChannel for path %s but received %s.", resourceId, channel);
    return (SeekableByteChannel) channel;
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) ReadableByteChannel(java.nio.channels.ReadableByteChannel)

Example 45 with SeekableByteChannel

use of java.nio.channels.SeekableByteChannel in project beam by apache.

the class IsmReaderTest method testCachedTailSeekableByteChannelThrowsOnTruncate.

@Test
public void testCachedTailSeekableByteChannelThrowsOnTruncate() throws Exception {
    try (SeekableByteChannel channel = new CachedTailSeekableByteChannel(0, new byte[0])) {
        expectedException.expect(NonWritableChannelException.class);
        channel.truncate(0);
    }
}
Also used : SeekableByteChannel(java.nio.channels.SeekableByteChannel) CachedTailSeekableByteChannel(org.apache.beam.runners.dataflow.worker.IsmReaderImpl.CachedTailSeekableByteChannel) CachedTailSeekableByteChannel(org.apache.beam.runners.dataflow.worker.IsmReaderImpl.CachedTailSeekableByteChannel) Test(org.junit.Test)

Aggregations

SeekableByteChannel (java.nio.channels.SeekableByteChannel)130 ByteBuffer (java.nio.ByteBuffer)58 Path (java.nio.file.Path)48 IOException (java.io.IOException)42 Test (org.junit.Test)33 InputStream (java.io.InputStream)14 NoSuchFileException (java.nio.file.NoSuchFileException)12 Test (org.testng.annotations.Test)9 ReadableByteChannel (java.nio.channels.ReadableByteChannel)8 OpenOption (java.nio.file.OpenOption)7 StandardOpenOption (java.nio.file.StandardOpenOption)7 HashSet (java.util.HashSet)7 File (java.io.File)6 FileSystem (java.nio.file.FileSystem)6 CloudStorageFileSystem (com.google.cloud.storage.contrib.nio.CloudStorageFileSystem)5 URI (java.net.URI)5 RandomAccessData (org.apache.beam.runners.dataflow.util.RandomAccessData)5 OutputStream (java.io.OutputStream)4 FileChannel (java.nio.channels.FileChannel)4 WritableByteChannel (java.nio.channels.WritableByteChannel)4