use of org.apache.hadoop.io.SequenceFile in project incubator-gobblin by apache.
the class FsStateStore method put.
/**
* See {@link StateStore#put(String, String, T)}.
*
* <p>
* This implementation does not support putting the state object into an existing store as
* append is to be supported by the Hadoop SequenceFile (HADOOP-7139).
* </p>
*/
@Override
public void put(String storeName, String tableName, T state) throws IOException {
String tmpTableName = this.useTmpFileForPut ? TMP_FILE_PREFIX + tableName : tableName;
Path tmpTablePath = new Path(new Path(this.storeRootDir, storeName), tmpTableName);
if (!this.fs.exists(tmpTablePath) && !create(storeName, tmpTableName)) {
throw new IOException("Failed to create a state file for table " + tmpTableName);
}
Closer closer = Closer.create();
try {
@SuppressWarnings("deprecation") SequenceFile.Writer writer = closer.register(SequenceFile.createWriter(this.fs, this.conf, tmpTablePath, Text.class, this.stateClass, SequenceFile.CompressionType.BLOCK, new DefaultCodec()));
writer.append(new Text(Strings.nullToEmpty(state.getId())), state);
} catch (Throwable t) {
throw closer.rethrow(t);
} finally {
closer.close();
}
if (this.useTmpFileForPut) {
Path tablePath = new Path(new Path(this.storeRootDir, storeName), tableName);
renamePath(tmpTablePath, tablePath);
}
}
use of org.apache.hadoop.io.SequenceFile in project hadoop by apache.
the class TestCodec method sequenceFileCodecTest.
private static void sequenceFileCodecTest(Configuration conf, int lines, String codecClass, int blockSize) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
Path filePath = new Path("SequenceFileCodecTest." + codecClass);
// Configuration
conf.setInt("io.seqfile.compress.blocksize", blockSize);
// Create the SequenceFile
FileSystem fs = FileSystem.get(conf);
LOG.info("Creating SequenceFile with codec \"" + codecClass + "\"");
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, filePath, Text.class, Text.class, CompressionType.BLOCK, (CompressionCodec) Class.forName(codecClass).newInstance());
// Write some data
LOG.info("Writing to SequenceFile...");
for (int i = 0; i < lines; i++) {
Text key = new Text("key" + i);
Text value = new Text("value" + i);
writer.append(key, value);
}
writer.close();
// Read the data back and check
LOG.info("Reading from the SequenceFile...");
SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, conf);
Writable key = (Writable) reader.getKeyClass().newInstance();
Writable value = (Writable) reader.getValueClass().newInstance();
int lc = 0;
try {
while (reader.next(key, value)) {
assertEquals("key" + lc, key.toString());
assertEquals("value" + lc, value.toString());
lc++;
}
} finally {
reader.close();
}
assertEquals(lines, lc);
// Delete temporary files
fs.delete(filePath, false);
LOG.info("SUCCESS! Completed SequenceFileCodecTest with codec \"" + codecClass + "\"");
}
use of org.apache.hadoop.io.SequenceFile in project flink by splunk.
the class HadoopIOFormatsITCase method preSubmit.
@Override
protected void preSubmit() throws Exception {
resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };
File sequenceFile = createAndRegisterTempFile("seqFile");
sequenceFileInPath = sequenceFile.toURI().toString();
// Create a sequence file
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
Path path = new Path(sequenceFile.getAbsolutePath());
// ------------------ Long / Text Key Value pair: ------------
int kvCount = 4;
LongWritable key = new LongWritable();
Text value = new Text();
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
for (int i = 0; i < kvCount; i++) {
if (i == 1) {
// write key = 0 a bit more often.
for (int a = 0; a < 15; a++) {
key.set(i);
value.set(i + " - somestring");
writer.append(key, value);
}
}
key.set(i);
value.set(i + " - somestring");
writer.append(key, value);
}
} finally {
IOUtils.closeStream(writer);
}
// ------------------ Long / Text Key Value pair: ------------
File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
sequenceFileInPathNull = sequenceFileNull.toURI().toString();
path = new Path(sequenceFileInPathNull);
LongWritable value1 = new LongWritable();
SequenceFile.Writer writer1 = null;
try {
writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
for (int i = 0; i < kvCount; i++) {
value1.set(i);
writer1.append(NullWritable.get(), value1);
}
} finally {
IOUtils.closeStream(writer1);
}
}
use of org.apache.hadoop.io.SequenceFile in project kylin by apache.
the class HFileOutputFormat3 method writePartitions.
/**
* Write out a {@link SequenceFile} that can be read by
* {@link TotalOrderPartitioner} that contains the split points in startKeys.
*/
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {
LOG.info("Writing partition information to " + partitionsPath);
if (startKeys.isEmpty()) {
throw new IllegalArgumentException("No regions passed");
}
// We're generating a list of split points, and we don't ever
// have keys < the first region (which has an empty start key)
// so we need to remove it. Otherwise we would end up with an
// empty reducer with index 0
TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);
ImmutableBytesWritable first = sorted.first();
if (!Arrays.equals(first.get(), HConstants.EMPTY_BYTE_ARRAY)) {
throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
}
sorted.remove(first);
// Write the actual file
FileSystem fs = partitionsPath.getFileSystem(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
try {
for (ImmutableBytesWritable startKey : sorted) {
writer.append(startKey, NullWritable.get());
}
} finally {
writer.close();
}
}
use of org.apache.hadoop.io.SequenceFile in project avro by apache.
the class TestSequenceFileReader method testNonAvroReducer.
@Test
public void testNonAvroReducer() throws Exception {
JobConf job = new JobConf();
Path outputPath = new Path(OUTPUT_DIR.getRoot().getPath());
outputPath.getFileSystem(job).delete(outputPath, true);
// configure input for Avro from sequence file
AvroJob.setInputSequenceFile(job);
AvroJob.setInputSchema(job, SCHEMA);
FileInputFormat.setInputPaths(job, file().toURI().toString());
// mapper is default, identity
// use a hadoop reducer that consumes Avro input
AvroJob.setMapOutputSchema(job, SCHEMA);
job.setReducerClass(NonAvroReducer.class);
// configure outputPath for non-Avro SequenceFile
job.setOutputFormat(SequenceFileOutputFormat.class);
FileOutputFormat.setOutputPath(job, outputPath);
// output key/value classes are default, LongWritable/Text
JobClient.runJob(job);
checkFile(new SequenceFileReader<>(new File(outputPath.toString() + "/part-00000")));
}
Aggregations