Search in sources :

Example 11 with Path

use of org.apache.hadoop.fs.Path in project camel by apache.

the class HdfsProducerTest method testProducerClose.

@Test
public void testProducerClose() throws Exception {
    if (!canTest()) {
        return;
    }
    for (int i = 0; i < 10; ++i) {
        // send 10 messages, and mark to close in last message
        template.sendBodyAndHeader("direct:start1", "PAPPO" + i, HdfsConstants.HDFS_CLOSE, i == 9 ? true : false);
    }
    Configuration conf = new Configuration();
    Path file1 = new Path("file:///" + TEMP_DIR.toUri() + "/test-camel1");
    FileSystem fs1 = FileSystem.get(file1.toUri(), conf);
    SequenceFile.Reader reader = new SequenceFile.Reader(fs1, file1, conf);
    Writable key = (Writable) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
    Writable value = (Writable) ReflectionUtils.newInstance(reader.getValueClass(), conf);
    int i = 0;
    while (reader.next(key, value)) {
        Text txt = (Text) value;
        assertEquals("PAPPO" + i, txt.toString());
        ++i;
    }
    IOHelper.close(reader);
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) Writable(org.apache.hadoop.io.Writable) DoubleWritable(org.apache.hadoop.io.DoubleWritable) LongWritable(org.apache.hadoop.io.LongWritable) ByteWritable(org.apache.hadoop.io.ByteWritable) IntWritable(org.apache.hadoop.io.IntWritable) BooleanWritable(org.apache.hadoop.io.BooleanWritable) FloatWritable(org.apache.hadoop.io.FloatWritable) Text(org.apache.hadoop.io.Text) Test(org.junit.Test)

Example 12 with Path

use of org.apache.hadoop.fs.Path in project camel by apache.

the class HdfsConsumer method doPoll.

protected int doPoll() throws Exception {
    class ExcludePathFilter implements PathFilter {

        public boolean accept(Path path) {
            return !(path.toString().endsWith(config.getOpenedSuffix()) || path.toString().endsWith(config.getReadSuffix()));
        }
    }
    int numMessages = 0;
    HdfsInfo info = setupHdfs(false);
    FileStatus[] fileStatuses;
    if (info.getFileSystem().isFile(info.getPath())) {
        fileStatuses = info.getFileSystem().globStatus(info.getPath());
    } else {
        Path pattern = info.getPath().suffix("/" + this.config.getPattern());
        fileStatuses = info.getFileSystem().globStatus(pattern, new ExcludePathFilter());
    }
    for (FileStatus status : fileStatuses) {
        if (normalFileIsDirectoryNoSuccessFile(status, info)) {
            continue;
        }
        if (config.getOwner() != null) {
            // must match owner
            if (!config.getOwner().equals(status.getOwner())) {
                if (log.isDebugEnabled()) {
                    log.debug("Skipping file: {} as not matching owner: {}", status.getPath().toString(), config.getOwner());
                }
                continue;
            }
        }
        try {
            this.rwlock.writeLock().lock();
            this.istream = HdfsInputStream.createInputStream(status.getPath().toString(), this.config);
            if (!this.istream.isOpened()) {
                if (log.isDebugEnabled()) {
                    log.debug("Skipping file: {} because it doesn't exist anymore", status.getPath().toString());
                }
                continue;
            }
        } finally {
            this.rwlock.writeLock().unlock();
        }
        try {
            Holder<Object> key = new Holder<Object>();
            Holder<Object> value = new Holder<Object>();
            while (this.istream.next(key, value) >= 0) {
                Exchange exchange = this.getEndpoint().createExchange();
                Message message = new DefaultMessage();
                String fileName = StringUtils.substringAfterLast(status.getPath().toString(), "/");
                message.setHeader(Exchange.FILE_NAME, fileName);
                if (key.value != null) {
                    message.setHeader(HdfsHeader.KEY.name(), key.value);
                }
                message.setBody(value.value);
                exchange.setIn(message);
                log.debug("Processing file {}", fileName);
                try {
                    processor.process(exchange);
                } catch (Exception e) {
                    exchange.setException(e);
                }
                // in case of unhandled exceptions then let the exception handler handle them
                if (exchange.getException() != null) {
                    getExceptionHandler().handleException(exchange.getException());
                }
                numMessages++;
            }
        } finally {
            IOHelper.close(istream, "input stream", log);
        }
    }
    return numMessages;
}
Also used : Path(org.apache.hadoop.fs.Path) DefaultMessage(org.apache.camel.impl.DefaultMessage) PathFilter(org.apache.hadoop.fs.PathFilter) FileStatus(org.apache.hadoop.fs.FileStatus) Message(org.apache.camel.Message) DefaultMessage(org.apache.camel.impl.DefaultMessage) IOException(java.io.IOException) Exchange(org.apache.camel.Exchange)

Example 13 with Path

use of org.apache.hadoop.fs.Path in project camel by apache.

the class HdfsInputStream method createInputStream.

public static HdfsInputStream createInputStream(String hdfsPath, HdfsConfiguration configuration) throws IOException {
    HdfsInputStream ret = new HdfsInputStream();
    ret.fileType = configuration.getFileType();
    ret.actualPath = hdfsPath;
    ret.suffixedPath = ret.actualPath + '.' + configuration.getOpenedSuffix();
    ret.suffixedReadPath = ret.actualPath + '.' + configuration.getReadSuffix();
    ret.chunkSize = configuration.getChunkSize();
    HdfsInfo info = HdfsInfoFactory.newHdfsInfo(ret.actualPath);
    if (info.getFileSystem().rename(new Path(ret.actualPath), new Path(ret.suffixedPath))) {
        ret.in = ret.fileType.createInputStream(ret.suffixedPath, configuration);
        ret.opened = true;
    } else {
        ret.opened = false;
    }
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path)

Example 14 with Path

use of org.apache.hadoop.fs.Path in project camel by apache.

the class HdfsInputStream method close.

@Override
public final void close() throws IOException {
    if (opened) {
        IOUtils.closeStream(in);
        HdfsInfo info = HdfsInfoFactory.newHdfsInfo(actualPath);
        info.getFileSystem().rename(new Path(suffixedPath), new Path(suffixedReadPath));
        opened = false;
    }
}
Also used : Path(org.apache.hadoop.fs.Path)

Example 15 with Path

use of org.apache.hadoop.fs.Path in project camel by apache.

the class HdfsOutputStream method createOutputStream.

public static HdfsOutputStream createOutputStream(String hdfsPath, HdfsConfiguration configuration) throws IOException {
    HdfsOutputStream ret = new HdfsOutputStream();
    ret.fileType = configuration.getFileType();
    ret.actualPath = hdfsPath;
    ret.info = new HdfsInfo(ret.actualPath);
    ret.suffixedPath = ret.actualPath + '.' + configuration.getOpenedSuffix();
    if (configuration.isWantAppend() || configuration.isAppend()) {
        if (!ret.info.getFileSystem().exists(new Path(ret.actualPath))) {
            configuration.setAppend(false);
        } else {
            configuration.setAppend(true);
            ret.info = new HdfsInfo(ret.suffixedPath);
            ret.info.getFileSystem().rename(new Path(ret.actualPath), new Path(ret.suffixedPath));
        }
    } else {
        if (ret.info.getFileSystem().exists(new Path(ret.actualPath))) {
            //only check if not directory
            if (!ret.info.getFileSystem().isDirectory(new Path(ret.actualPath))) {
                if (configuration.isOverwrite()) {
                    ret.info.getFileSystem().delete(new Path(ret.actualPath), true);
                } else {
                    throw new RuntimeCamelException("The file already exists");
                }
            }
        }
    }
    ret.out = ret.fileType.createOutputStream(ret.suffixedPath, configuration);
    ret.opened = true;
    return ret;
}
Also used : Path(org.apache.hadoop.fs.Path) RuntimeCamelException(org.apache.camel.RuntimeCamelException)

Aggregations

Path (org.apache.hadoop.fs.Path)7063 Test (org.junit.Test)2926 FileSystem (org.apache.hadoop.fs.FileSystem)2223 Configuration (org.apache.hadoop.conf.Configuration)1608 IOException (java.io.IOException)1574 FileStatus (org.apache.hadoop.fs.FileStatus)912 FSDataOutputStream (org.apache.hadoop.fs.FSDataOutputStream)662 ArrayList (java.util.ArrayList)644 File (java.io.File)518 MiniDFSCluster (org.apache.hadoop.hdfs.MiniDFSCluster)342 JobConf (org.apache.hadoop.mapred.JobConf)332 Job (org.apache.hadoop.mapreduce.Job)322 FileNotFoundException (java.io.FileNotFoundException)319 FSDataInputStream (org.apache.hadoop.fs.FSDataInputStream)293 HashMap (java.util.HashMap)279 DistributedFileSystem (org.apache.hadoop.hdfs.DistributedFileSystem)276 FsPermission (org.apache.hadoop.fs.permission.FsPermission)270 URI (java.net.URI)267 HdfsConfiguration (org.apache.hadoop.hdfs.HdfsConfiguration)220 Text (org.apache.hadoop.io.Text)185