Examples with SequenceFile - org.apache.hadoop.io.SequenceFile

Example 16 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project incubator-gobblin by apache.

the class FsStateStore method put.

/**
 * See {@link StateStore#put(String, String, T)}.
 *
 * <p>
 *   This implementation does not support putting the state object into an existing store as
 *   append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
 * </p>
 */
@Override
public void put(String storeName, String tableName, T state) throws IOException {
    String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
    Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);
    if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
        throw new IOException("Failed to create a state file for table " + tmpTableName);
    }
    Closer closer = Closer.create();
    try {
        @SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
        writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
    } catch (Throwable t) {
        throw closer.rethrow(t);
    } finally {
        closer.close();
    }
    if (this.useTmpFileForPut) {
        Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
        renamePath(tmpTablePath, tablePath);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Closer(com.google.common.io.Closer) SequenceFile(org.apache.hadoop.io.SequenceFile) DefaultCodec(org.apache.hadoop.io.compress.DefaultCodec) Text(org.apache.hadoop.io.Text) IOException(java.io.IOException)

Example 17 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project hadoop by apache.

the class TestCodec method sequenceFileCodecTest.

private static void sequenceFileCodecTest(Configuration conf, int lines, String codecClass, int blockSize) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
    Path filePath = new Path("SequenceFileCodecTest." + codecClass);
    // Configuration
    conf.setInt("io.seqfile.compress.blocksize", blockSize);
    // Create the SequenceFile
    FileSystem fs = FileSystem.get(conf);
    LOG.info("Creating SequenceFile with codec \"" + codecClass + "\"");
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, filePath, Text.class, Text.class, CompressionType.BLOCK, (CompressionCodec) Class.forName(codecClass).newInstance());
    // Write some data
    LOG.info("Writing to SequenceFile...");
    for (int i = 0; i < lines; i++) {
        Text key = new Text("key" + i);
        Text value = new Text("value" + i);
        writer.append(key, value);
    }
    writer.close();
    // Read the data back and check
    LOG.info("Reading from the SequenceFile...");
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
    Writable key = (Writable) reader.getKeyClass().newInstance();
    Writable value = (Writable) reader.getValueClass().newInstance();
    int lc = 0;
    try {
        while (reader.next(key, value)) {
            assertEquals("key" + lc, key.toString());
            assertEquals("value" + lc, value.toString());
            lc++;
        }
    } finally {
        reader.close();
    }
    assertEquals(lines, lc);
    // Delete temporary files
    fs.delete(filePath, false);
    LOG.info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\"");
}

Also used : Path(org.apache.hadoop.fs.Path) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) LineReader(org.apache.hadoop.util.LineReader) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) Writable(org.apache.hadoop.io.Writable) Text(org.apache.hadoop.io.Text)

Example 18 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project flink by splunk.

the class HadoopIOFormatsITCase method preSubmit.

@Override
protected void preSubmit() throws Exception {
    resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };
    File sequenceFile = createAndRegisterTempFile("seqFile");
    sequenceFileInPath = sequenceFile.toURI().toString();
    // Create a sequence file
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
    Path path = new Path(sequenceFile.getAbsolutePath());
    // ------------------ Long / Text Key Value pair: ------------
    int kvCount = 4;
    LongWritable key = new LongWritable();
    Text value = new Text();
    SequenceFile.Writer writer = null;
    try {
        writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
        for (int i = 0; i < kvCount; i++) {
            if (i == 1) {
                // write key = 0 a bit more often.
                for (int a = 0; a < 15; a++) {
                    key.set(i);
                    value.set(i + " - somestring");
                    writer.append(key, value);
                }
            }
            key.set(i);
            value.set(i + " - somestring");
            writer.append(key, value);
        }
    } finally {
        IOUtils.closeStream(writer);
    }
    // ------------------ Long / Text Key Value pair: ------------
    File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
    sequenceFileInPathNull = sequenceFileNull.toURI().toString();
    path = new Path(sequenceFileInPathNull);
    LongWritable value1 = new LongWritable();
    SequenceFile.Writer writer1 = null;
    try {
        writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
        for (int i = 0; i < kvCount; i++) {
            value1.set(i);
            writer1.append(NullWritable.get(), value1);
        }
    } finally {
        IOUtils.closeStream(writer1);
    }
}

Also used : Path(org.apache.hadoop.fs.Path) Text(org.apache.hadoop.io.Text) NullWritable(org.apache.hadoop.io.NullWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) FileSystem(org.apache.hadoop.fs.FileSystem) LongWritable(org.apache.hadoop.io.LongWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File)

Example 19 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project kylin by apache.

the class HFileOutputFormat3 method writePartitions.

/**
 * Write out a {@link SequenceFile} that can be read by
 * {@link TotalOrderPartitioner} that contains the split points in startKeys.
 */
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {
    LOG.info("Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }
    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);
    ImmutableBytesWritable first = sorted.first();
    if (!Arrays.equals(first.get(), HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);
    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}

Also used : ImmutableBytesWritable(org.apache.hadoop.hbase.io.ImmutableBytesWritable) SequenceFile(org.apache.hadoop.io.SequenceFile) TreeSet(java.util.TreeSet) FileSystem(org.apache.hadoop.fs.FileSystem) HFileSystem(org.apache.hadoop.hbase.fs.HFileSystem)

Example 20 with SequenceFile

use of org.apache.hadoop.io.SequenceFile in project avro by apache.

the class TestSequenceFileReader method testNonAvroReducer.

@Test
public void testNonAvroReducer() throws Exception {
    JobConf job = new JobConf();
    Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
    outputPath.getFileSystem(job).delete(outputPath, true);
    // configure input for Avro from sequence file
    AvroJob.setInputSequenceFile(job);
    AvroJob.setInputSchema(job, SCHEMA);
    FileInputFormat.setInputPaths(job, file().toURI().toString());
    // mapper is default, identity
    // use a hadoop reducer that consumes Avro input
    AvroJob.setMapOutputSchema(job, SCHEMA);
    job.setReducerClass(NonAvroReducer.class);
    // configure outputPath for non-Avro SequenceFile
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    // output key/value classes are default, LongWritable/Text
    JobClient.runJob(job);
    checkFile(new SequenceFileReader<>(new File(outputPath.toString() + "/part-00000")));
}

Also used : Path(org.apache.hadoop.fs.Path) JobConf(org.apache.hadoop.mapred.JobConf) SequenceFile(org.apache.hadoop.io.SequenceFile) File(java.io.File) Test(org.junit.Test)

Aggregations

SequenceFile (org.apache.hadoop.io.SequenceFile)20 Path (org.apache.hadoop.fs.Path)13 FileSystem (org.apache.hadoop.fs.FileSystem)12 Text (org.apache.hadoop.io.Text)10 File (java.io.File)7 IOException (java.io.IOException)5 Configuration (org.apache.hadoop.conf.Configuration)5 IntWritable (org.apache.hadoop.io.IntWritable)4 LongWritable (org.apache.hadoop.io.LongWritable)4 NullWritable (org.apache.hadoop.io.NullWritable)4 TreeSet (java.util.TreeSet)3 HFileSystem (org.apache.hadoop.hbase.fs.HFileSystem)3 ImmutableBytesWritable (org.apache.hadoop.hbase.io.ImmutableBytesWritable)3 Writable (org.apache.hadoop.io.Writable)3 Closer (com.google.common.io.Closer)2 ArrayList (java.util.ArrayList)2 DefaultCodec (org.apache.hadoop.io.compress.DefaultCodec)2 Before (org.junit.Before)2 Test (org.junit.Test)2 BufferedReader (java.io.BufferedReader)1