Search in sources :

Example 1 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project hbase by apache.

the class HFileOutputFormat2 method writePartitions.

/**
   * Write out a {@link SequenceFile} that can be read by
   * {@link TotalOrderPartitioner} that contains the split points in startKeys.
   */
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {
    LOG.info("Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }
    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);
    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}
Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem)

Example 2 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project flink by apache.

the class HadoopIOFormatsITCase method preSubmit.

@Override
protected void preSubmit() throws Exception {
    resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };
    File sequenceFile = createAndRegisterTempFile("seqFile");
    sequenceFileInPath = sequenceFile.toURI().toString();
    // Create a sequence file
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
    Path path = new Path(sequenceFile.getAbsolutePath());
    //  ------------------ Long / Text Key Value pair: ------------
    int kvCount = 4;
    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < kvCount; i++) {
            if (i == 1) {
                // write key = 0 a bit more often.
                for (int a = 0; a < 15; a++) {
                    key.set(i);
                    value.set(i + " - somestring");
                    writer.append(key, value);
                }
            }
            key.set(i);
            value.set(i + " - somestring");
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
    //  ------------------ Long / Text Key Value pair: ------------
    File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
    sequenceFileInPathNull = sequenceFileNull.toURI().toString();
    path = new Path(sequenceFileInPathNull);
    LongWritable value1 = new LongWritable();
    SequenceFile.Writer writer1 = null;
    try {
        writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
        for (int i = 0; i < kvCount; i++) {
            value1.set(i);
            writer1.append(NullWritable.get(), value1);
        }
    } finally {
        IOUtils.closeStream(writer1);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.flink.configuration.Configuration) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 3 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project nifi by apache.

the class KeyValueReader method readSequenceFile.

@Override
public Set<FlowFile> readSequenceFile(Path file, Configuration configuration, FileSystem fileSystem) throws IOException {
    final SequenceFile.Reader reader;
    Set<FlowFile> flowFiles = new HashSet<>();
    reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
    final Text key = new Text();
    final KeyValueWriterCallback callback = new KeyValueWriterCallback(reader);
    final String inputfileName = file.getName() + "." + System.nanoTime() + ".";
    int counter = 0;
    LOG.debug("Read from SequenceFile: {} ", new Object[] { file });
    try {
        while (reader.next(key)) {
            String fileName = key.toString();
            // the key may be a file name, and may not
            if (LOOKS_LIKE_FILENAME.matcher(fileName).matches()) {
                if (fileName.contains(File.separator)) {
                    fileName = StringUtils.substringAfterLast(fileName, File.separator);
                }
                fileName = fileName + "." + System.nanoTime();
            } else {
                fileName = inputfileName + ++counter;
            }
            FlowFile flowFile = session.create();
            flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
            callback.key = key;
            try {
                flowFile = session.write(flowFile, callback);
                flowFiles.add(flowFile);
            } catch (ProcessException e) {
                LOG.error("Could not write to flowfile {}", new Object[] { flowFile }, e);
                session.remove(flowFile);
            }
            key.clear();
        }
    } finally {
        IOUtils.closeQuietly(reader);
    }
    return flowFiles;
}
Also used : FlowFile(org.apache.nifi.flowfile.FlowFile) ProcessException(org.apache.nifi.processor.exception.ProcessException) Reader(org.apache.hadoop.io.SequenceFile.Reader) SequenceFile(org.apache.hadoop.io.SequenceFile) Text(org.apache.hadoop.io.Text) HashSet(java.util.HashSet)

Example 4 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project elephant-bird by twitter.

the class TestSequenceFileStorage method setUp.

@Before
public void setUp() throws Exception {
    // create local Pig server
    pigServer = PigTestUtil.makePigServer();
    // create temp SequenceFile
    File tempFile = File.createTempFile("test", ".txt");
    tempFilename = tempFile.getAbsolutePath();
    Path path = new Path("file:///" + tempFilename);
    Configuration conf = new Configuration();
    FileSystem fs = path.getFileSystem(conf);
    IntWritable key = new IntWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < DATA.length; ++i) {
            key.set(i);
            value.set(DATA[i]);
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) Text(org.apache.hadoop.io.Text) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) IntWritable(org.apache.hadoop.io.IntWritable) Before(org.junit.Before)

Example 5 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project elephant-bird by twitter.

the class AbstractTestWritableConverter method setup.

@Before
public void setup() throws IOException {
    // create local Pig server
    pigServer = PigTestUtil.makePigServer();
    // create temp SequenceFile
    final File tempFile = File.createTempFile("test", ".txt");
    tempFilename = tempFile.getAbsolutePath();
    final Path path = new Path("file:///" + tempFilename);
    final Configuration conf = new Configuration();
    final FileSystem fs = path.getFileSystem(conf);
    final IntWritable key = new IntWritable();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), writableClass);
        for (int i = 0; i < data.length; ++i) {
            key.set(i);
            writer.append(key, data[i]);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
}
Also used : Path(org.apache.hadoop.fs.Path) Configuration(org.apache.hadoop.conf.Configuration) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) IntWritable(org.apache.hadoop.io.IntWritable) Before(org.junit.Before)

Aggregations

SequenceFile (org.apache.hadoop.io.SequenceFile)16 Path (org.apache.hadoop.fs.Path)10 FileSystem (org.apache.hadoop.fs.FileSystem)9 Text (org.apache.hadoop.io.Text)8 IOException (java.io.IOException)5 Configuration (org.apache.hadoop.conf.Configuration)5 File (java.io.File)4 IntWritable (org.apache.hadoop.io.IntWritable)4 Writable (org.apache.hadoop.io.Writable)3 Closer (com.google.common.io.Closer)2 ArrayList (java.util.ArrayList)2 TreeSet (java.util.TreeSet)2 HFileSystem (org.apache.hadoop.hbase.fs.HFileSystem)2 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)2 LongWritable (org.apache.hadoop.io.LongWritable)2 NullWritable (org.apache.hadoop.io.NullWritable)2 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)2 Before (org.junit.Before)2 BufferedReader (java.io.BufferedReader)1 InputStreamReader (java.io.InputStreamReader)1