use of org.apache.hadoop.io.SequenceFile in project hbase by apache.
the class HFileOutputFormat2 method writePartitions.
/**
* Write out a {@link SequenceFile} that can be read by
* {@link TotalOrderPartitioner} that contains the split points in startKeys.
*/
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException {
LOG.info("Writing partition information to " + partitionsPath);
if (startKeys.isEmpty()) {
throw new IllegalArgumentException("No regions passed");
}
// We're generating a list of split points, and we don't ever
// have keys < the first region (which has an empty start key)
// so we need to remove it. Otherwise we would end up with an
// empty reducer with index 0
TreeSet<ImmutableBytesWritable> sorted = new TreeSet<>(startKeys);
ImmutableBytesWritable first = sorted.first();
if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get()));
}
sorted.remove(first);
// Write the actual file
FileSystem fs = partitionsPath.getFileSystem(conf);
SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class);
try {
for (ImmutableBytesWritable startKey : sorted) {
writer.append(startKey, NullWritable.get());
}
} finally {
writer.close();
}
}
use of org.apache.hadoop.io.SequenceFile in project flink by apache.
the class HadoopIOFormatsITCase method preSubmit.
@Override
protected void preSubmit() throws Exception {
resultPath = new String[] { getTempDirPath("result0"), getTempDirPath("result1") };
File sequenceFile = createAndRegisterTempFile("seqFile");
sequenceFileInPath = sequenceFile.toURI().toString();
// Create a sequence file
org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
FileSystem fs = FileSystem.get(URI.create(sequenceFile.getAbsolutePath()), conf);
Path path = new Path(sequenceFile.getAbsolutePath());
// ------------------ Long / Text Key Value pair: ------------
int kvCount = 4;
LongWritable key = new LongWritable();
Text value = new Text();
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
for (int i = 0; i < kvCount; i++) {
if (i == 1) {
// write key = 0 a bit more often.
for (int a = 0; a < 15; a++) {
key.set(i);
value.set(i + " - somestring");
writer.append(key, value);
}
}
key.set(i);
value.set(i + " - somestring");
writer.append(key, value);
}
} finally {
IOUtils.closeStream(writer);
}
// ------------------ Long / Text Key Value pair: ------------
File sequenceFileNull = createAndRegisterTempFile("seqFileNullKey");
sequenceFileInPathNull = sequenceFileNull.toURI().toString();
path = new Path(sequenceFileInPathNull);
LongWritable value1 = new LongWritable();
SequenceFile.Writer writer1 = null;
try {
writer1 = SequenceFile.createWriter(fs, conf, path, NullWritable.class, value1.getClass());
for (int i = 0; i < kvCount; i++) {
value1.set(i);
writer1.append(NullWritable.get(), value1);
}
} finally {
IOUtils.closeStream(writer1);
}
}
use of org.apache.hadoop.io.SequenceFile in project nifi by apache.
the class KeyValueReader method readSequenceFile.
@Override
public Set<FlowFile> readSequenceFile(Path file, Configuration configuration, FileSystem fileSystem) throws IOException {
final SequenceFile.Reader reader;
Set<FlowFile> flowFiles = new HashSet<>();
reader = new SequenceFile.Reader(configuration, Reader.file(fileSystem.makeQualified(file)));
final Text key = new Text();
final KeyValueWriterCallback callback = new KeyValueWriterCallback(reader);
final String inputfileName = file.getName() + "." + System.nanoTime() + ".";
int counter = 0;
LOG.debug("Read from SequenceFile: {} ", new Object[] { file });
try {
while (reader.next(key)) {
String fileName = key.toString();
// the key may be a file name, and may not
if (LOOKS_LIKE_FILENAME.matcher(fileName).matches()) {
if (fileName.contains(File.separator)) {
fileName = StringUtils.substringAfterLast(fileName, File.separator);
}
fileName = fileName + "." + System.nanoTime();
} else {
fileName = inputfileName + ++counter;
}
FlowFile flowFile = session.create();
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), fileName);
callback.key = key;
try {
flowFile = session.write(flowFile, callback);
flowFiles.add(flowFile);
} catch (ProcessException e) {
LOG.error("Could not write to flowfile {}", new Object[] { flowFile }, e);
session.remove(flowFile);
}
key.clear();
}
} finally {
IOUtils.closeQuietly(reader);
}
return flowFiles;
}
use of org.apache.hadoop.io.SequenceFile in project elephant-bird by twitter.
the class TestSequenceFileStorage method setUp.
@Before
public void setUp() throws Exception {
// create local Pig server
pigServer = PigTestUtil.makePigServer();
// create temp SequenceFile
File tempFile = File.createTempFile("test", ".txt");
tempFilename = tempFile.getAbsolutePath();
Path path = new Path("file:///" + tempFilename);
Configuration conf = new Configuration();
FileSystem fs = path.getFileSystem(conf);
IntWritable key = new IntWritable();
Text value = new Text();
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), value.getClass());
for (int i = 0; i < DATA.length; ++i) {
key.set(i);
value.set(DATA[i]);
writer.append(key, value);
}
} finally {
IOUtils.closeStream(writer);
}
}
use of org.apache.hadoop.io.SequenceFile in project elephant-bird by twitter.
the class AbstractTestWritableConverter method setup.
@Before
public void setup() throws IOException {
// create local Pig server
pigServer = PigTestUtil.makePigServer();
// create temp SequenceFile
final File tempFile = File.createTempFile("test", ".txt");
tempFilename = tempFile.getAbsolutePath();
final Path path = new Path("file:///" + tempFilename);
final Configuration conf = new Configuration();
final FileSystem fs = path.getFileSystem(conf);
final IntWritable key = new IntWritable();
SequenceFile.Writer writer = null;
try {
writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), writableClass);
for (int i = 0; i < data.length; ++i) {
key.set(i);
writer.append(key, data[i]);
}
} finally {
IOUtils.closeStream(writer);
}
}
Aggregations