Search in sources :

Example 1 with FileIteratingFirehose

use of io.druid.data.input.impl.FileIteratingFirehose in project druid by druid-io.

the class StaticS3FirehoseFactory method connect.

@Override
public Firehose connect(StringInputRowParser firehoseParser) throws IOException {
    Preconditions.checkNotNull(s3Client, "null s3Client");
    final LinkedList<URI> objectQueue = Lists.newLinkedList(uris);
    return new FileIteratingFirehose(new Iterator<LineIterator>() {

        @Override
        public boolean hasNext() {
            return !objectQueue.isEmpty();
        }

        @Override
        public LineIterator next() {
            final URI nextURI = objectQueue.poll();
            final String s3Bucket = nextURI.getAuthority();
            final S3Object s3Object = new S3Object(nextURI.getPath().startsWith("/") ? nextURI.getPath().substring(1) : nextURI.getPath());
            log.info("Reading from bucket[%s] object[%s] (%s)", s3Bucket, s3Object.getKey(), nextURI);
            try {
                final InputStream innerInputStream = s3Client.getObject(new S3Bucket(s3Bucket), s3Object.getKey()).getDataInputStream();
                final InputStream outerInputStream = s3Object.getKey().endsWith(".gz") ? CompressionUtils.gzipInputStream(innerInputStream) : innerInputStream;
                return IOUtils.lineIterator(new BufferedReader(new InputStreamReader(outerInputStream, Charsets.UTF_8)));
            } catch (Exception e) {
                log.error(e, "Exception reading from bucket[%s] object[%s]", s3Bucket, s3Object.getKey());
                throw Throwables.propagate(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }, firehoseParser);
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) URI(java.net.URI) LineIterator(org.apache.commons.io.LineIterator) IOException(java.io.IOException) S3Bucket(org.jets3t.service.model.S3Bucket) BufferedReader(java.io.BufferedReader) FileIteratingFirehose(io.druid.data.input.impl.FileIteratingFirehose) S3Object(org.jets3t.service.model.S3Object)

Example 2 with FileIteratingFirehose

use of io.druid.data.input.impl.FileIteratingFirehose in project druid by druid-io.

the class StaticGoogleBlobStoreFirehoseFactory method connect.

@Override
public Firehose connect(StringInputRowParser stringInputRowParser) throws IOException {
    Preconditions.checkNotNull(storage, "null storage");
    final LinkedList<GoogleBlob> objectQueue = Lists.newLinkedList(blobs);
    return new FileIteratingFirehose(new Iterator<LineIterator>() {

        @Override
        public boolean hasNext() {
            return !objectQueue.isEmpty();
        }

        @Override
        public LineIterator next() {
            final GoogleBlob nextURI = objectQueue.poll();
            final String bucket = nextURI.getBucket();
            final String path = nextURI.getPath().startsWith("/") ? nextURI.getPath().substring(1) : nextURI.getPath();
            try {
                final InputStream innerInputStream = new GoogleByteSource(storage, bucket, path).openStream();
                final InputStream outerInputStream = path.endsWith(".gz") ? CompressionUtils.gzipInputStream(innerInputStream) : innerInputStream;
                return IOUtils.lineIterator(new BufferedReader(new InputStreamReader(outerInputStream, Charsets.UTF_8)));
            } catch (Exception e) {
                LOG.error(e, "Exception opening bucket[%s] blob[%s]", bucket, path);
                throw Throwables.propagate(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }, stringInputRowParser);
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) LineIterator(org.apache.commons.io.LineIterator) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) FileIteratingFirehose(io.druid.data.input.impl.FileIteratingFirehose) GoogleByteSource(io.druid.storage.google.GoogleByteSource)

Example 3 with FileIteratingFirehose

use of io.druid.data.input.impl.FileIteratingFirehose in project druid by druid-io.

the class StaticCloudFilesFirehoseFactory method connect.

@Override
public Firehose connect(StringInputRowParser stringInputRowParser) throws IOException, ParseException {
    Preconditions.checkNotNull(cloudFilesApi, "null cloudFilesApi");
    final LinkedList<CloudFilesBlob> objectQueue = Lists.newLinkedList(blobs);
    return new FileIteratingFirehose(new Iterator<LineIterator>() {

        @Override
        public boolean hasNext() {
            return !objectQueue.isEmpty();
        }

        @Override
        public LineIterator next() {
            final CloudFilesBlob nextURI = objectQueue.poll();
            final String region = nextURI.getRegion();
            final String container = nextURI.getContainer();
            final String path = nextURI.getPath();
            log.info("Retrieving file from region[%s], container[%s] and path [%s]", region, container, path);
            CloudFilesObjectApiProxy objectApi = new CloudFilesObjectApiProxy(cloudFilesApi, region, container);
            final CloudFilesByteSource byteSource = new CloudFilesByteSource(objectApi, path);
            try {
                final InputStream innerInputStream = byteSource.openStream();
                final InputStream outerInputStream = path.endsWith(".gz") ? CompressionUtils.gzipInputStream(innerInputStream) : innerInputStream;
                return IOUtils.lineIterator(new BufferedReader(new InputStreamReader(outerInputStream, Charsets.UTF_8)));
            } catch (IOException e) {
                log.error(e, "Exception opening container[%s] blob[%s] from region[%s]", container, path, region);
                throw Throwables.propagate(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }, stringInputRowParser);
}
Also used : CloudFilesByteSource(io.druid.storage.cloudfiles.CloudFilesByteSource) InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) IOException(java.io.IOException) LineIterator(org.apache.commons.io.LineIterator) CloudFilesObjectApiProxy(io.druid.storage.cloudfiles.CloudFilesObjectApiProxy) BufferedReader(java.io.BufferedReader) FileIteratingFirehose(io.druid.data.input.impl.FileIteratingFirehose)

Example 4 with FileIteratingFirehose

use of io.druid.data.input.impl.FileIteratingFirehose in project druid by druid-io.

the class StaticAzureBlobStoreFirehoseFactory method connect.

@Override
public Firehose connect(StringInputRowParser stringInputRowParser) throws IOException {
    Preconditions.checkNotNull(azureStorage, "null azureStorage");
    final LinkedList<AzureBlob> objectQueue = Lists.newLinkedList(blobs);
    return new FileIteratingFirehose(new Iterator<LineIterator>() {

        @Override
        public boolean hasNext() {
            return !objectQueue.isEmpty();
        }

        @Override
        public LineIterator next() {
            final AzureBlob nextURI = objectQueue.poll();
            final String container = nextURI.getContainer();
            final String path = nextURI.getPath().startsWith("/") ? nextURI.getPath().substring(1) : nextURI.getPath();
            try {
                final InputStream innerInputStream = new AzureByteSource(azureStorage, container, path).openStream();
                final InputStream outerInputStream = path.endsWith(".gz") ? CompressionUtils.gzipInputStream(innerInputStream) : innerInputStream;
                return IOUtils.lineIterator(new BufferedReader(new InputStreamReader(outerInputStream, Charsets.UTF_8)));
            } catch (Exception e) {
                log.error(e, "Exception opening container[%s] blob[%s]", container, path);
                throw Throwables.propagate(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }, stringInputRowParser);
}
Also used : InputStreamReader(java.io.InputStreamReader) InputStream(java.io.InputStream) AzureByteSource(io.druid.storage.azure.AzureByteSource) LineIterator(org.apache.commons.io.LineIterator) IOException(java.io.IOException) BufferedReader(java.io.BufferedReader) FileIteratingFirehose(io.druid.data.input.impl.FileIteratingFirehose)

Example 5 with FileIteratingFirehose

use of io.druid.data.input.impl.FileIteratingFirehose in project druid by druid-io.

the class LocalFirehoseFactory method connect.

@Override
public Firehose connect(StringInputRowParser firehoseParser) throws IOException {
    if (baseDir == null) {
        throw new IAE("baseDir is null");
    }
    log.info("Searching for all [%s] in and beneath [%s]", filter, baseDir.getAbsoluteFile());
    Collection<File> foundFiles = FileUtils.listFiles(baseDir.getAbsoluteFile(), new WildcardFileFilter(filter), TrueFileFilter.INSTANCE);
    if (foundFiles == null || foundFiles.isEmpty()) {
        throw new ISE("Found no files to ingest! Check your schema.");
    }
    log.info("Found files: " + foundFiles);
    final LinkedList<File> files = Lists.newLinkedList(foundFiles);
    return new FileIteratingFirehose(new Iterator<LineIterator>() {

        @Override
        public boolean hasNext() {
            return !files.isEmpty();
        }

        @Override
        public LineIterator next() {
            try {
                return FileUtils.lineIterator(files.poll());
            } catch (Exception e) {
                throw Throwables.propagate(e);
            }
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }, firehoseParser);
}
Also used : FileIteratingFirehose(io.druid.data.input.impl.FileIteratingFirehose) ISE(io.druid.java.util.common.ISE) IAE(io.druid.java.util.common.IAE) WildcardFileFilter(org.apache.commons.io.filefilter.WildcardFileFilter) File(java.io.File) LineIterator(org.apache.commons.io.LineIterator) IOException(java.io.IOException)

Aggregations

FileIteratingFirehose (io.druid.data.input.impl.FileIteratingFirehose)5 IOException (java.io.IOException)5 LineIterator (org.apache.commons.io.LineIterator)5 BufferedReader (java.io.BufferedReader)4 InputStream (java.io.InputStream)4 InputStreamReader (java.io.InputStreamReader)4 IAE (io.druid.java.util.common.IAE)1 ISE (io.druid.java.util.common.ISE)1 AzureByteSource (io.druid.storage.azure.AzureByteSource)1 CloudFilesByteSource (io.druid.storage.cloudfiles.CloudFilesByteSource)1 CloudFilesObjectApiProxy (io.druid.storage.cloudfiles.CloudFilesObjectApiProxy)1 GoogleByteSource (io.druid.storage.google.GoogleByteSource)1 File (java.io.File)1 URI (java.net.URI)1 WildcardFileFilter (org.apache.commons.io.filefilter.WildcardFileFilter)1 S3Bucket (org.jets3t.service.model.S3Bucket)1 S3Object (org.jets3t.service.model.S3Object)1