use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.
the class TfUtils method getPartFileID.
/**
* Function to generate custom file names (transform-part-.....) for
* mappers' output for ApplyTfCSV job. The idea is to find the index
* of (thisfile, fileoffset) in the list of all offsets from the
* counters/offsets file, which was generated from either GenTfMtdMR
* or AssignRowIDMR job.
*
* @param job job configuration
* @param offset file offset
* @return part file id (ie, 00001, 00002, etc)
* @throws IOException if IOException occurs
*/
public String getPartFileID(JobConf job, long offset) throws IOException {
Reader reader = null;
int id = 0;
try {
reader = initOffsetsReader(job);
ByteWritable key = new ByteWritable();
OffsetCount value = new OffsetCount();
String thisFile = TfUtils.getPartFileName(job);
while (reader.next(key, value)) {
if (thisFile.equals(value.filename) && value.fileOffset == offset)
break;
id++;
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
String sid = Integer.toString(id);
char[] carr = new char[5 - sid.length()];
Arrays.fill(carr, '0');
String ret = (new String(carr)).concat(sid);
return ret;
}
use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.
the class ApplyTfBBMapper method configure.
@Override
public void configure(JobConf job) {
super.configure(job);
try {
_partFileWithHeader = TfUtils.isPartFileWithHeader(job);
tfmapper = new TfUtils(job);
tfmapper.loadTfMetadata(job, true);
// Load relevant information for CSV Reblock
ByteWritable key = new ByteWritable();
OffsetCount value = new OffsetCount();
Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
Path path = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
String thisfile = path.makeQualified(fs).toString();
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(fs, p, job);
while (reader.next(key, value)) {
// "key" needn't be checked since the offset file has information about a single CSV input (the raw data file)
if (thisfile.equals(value.filename))
offsetMap.put(value.fileOffset, value.count);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
idxRow = new CSVReblockMapper.IndexedBlockRow();
int maxBclen = 0;
for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) {
if (maxBclen < in.bclen)
maxBclen = in.bclen;
}
//always dense since common csv usecase
idxRow.getRow().data.reset(1, maxBclen, false);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (JSONException e) {
throw new RuntimeException(e);
}
}
use of org.apache.hadoop.io.ByteWritable in project presto by prestodb.
the class OrcTester method decodeRecordReaderValue.
private static Object decodeRecordReaderValue(Type type, Object actualValue) {
if (actualValue instanceof OrcLazyObject) {
try {
actualValue = ((OrcLazyObject) actualValue).materialize();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
if (actualValue instanceof BooleanWritable) {
actualValue = ((BooleanWritable) actualValue).get();
} else if (actualValue instanceof ByteWritable) {
actualValue = ((ByteWritable) actualValue).get();
} else if (actualValue instanceof BytesWritable) {
actualValue = new SqlVarbinary(((BytesWritable) actualValue).copyBytes());
} else if (actualValue instanceof DateWritable) {
actualValue = new SqlDate(((DateWritable) actualValue).getDays());
} else if (actualValue instanceof DoubleWritable) {
actualValue = ((DoubleWritable) actualValue).get();
} else if (actualValue instanceof FloatWritable) {
actualValue = ((FloatWritable) actualValue).get();
} else if (actualValue instanceof IntWritable) {
actualValue = ((IntWritable) actualValue).get();
} else if (actualValue instanceof HiveCharWritable) {
actualValue = ((HiveCharWritable) actualValue).getPaddedValue().toString();
} else if (actualValue instanceof LongWritable) {
actualValue = ((LongWritable) actualValue).get();
} else if (actualValue instanceof ShortWritable) {
actualValue = ((ShortWritable) actualValue).get();
} else if (actualValue instanceof HiveDecimalWritable) {
DecimalType decimalType = (DecimalType) type;
HiveDecimalWritable writable = (HiveDecimalWritable) actualValue;
// writable messes with the scale so rescale the values to the Presto type
BigInteger rescaledValue = rescale(writable.getHiveDecimal().unscaledValue(), writable.getScale(), decimalType.getScale());
actualValue = new SqlDecimal(rescaledValue, decimalType.getPrecision(), decimalType.getScale());
} else if (actualValue instanceof Text) {
actualValue = actualValue.toString();
} else if (actualValue instanceof TimestampWritable) {
TimestampWritable timestamp = (TimestampWritable) actualValue;
actualValue = sqlTimestampOf((timestamp.getSeconds() * 1000) + (timestamp.getNanos() / 1000000L), SESSION);
} else if (actualValue instanceof OrcStruct) {
List<Object> fields = new ArrayList<>();
OrcStruct structObject = (OrcStruct) actualValue;
for (int fieldId = 0; fieldId < structObject.getNumFields(); fieldId++) {
fields.add(OrcUtil.getFieldValue(structObject, fieldId));
}
actualValue = decodeRecordReaderStruct(type, fields);
} else if (actualValue instanceof com.facebook.hive.orc.OrcStruct) {
List<Object> fields = new ArrayList<>();
com.facebook.hive.orc.OrcStruct structObject = (com.facebook.hive.orc.OrcStruct) actualValue;
for (int fieldId = 0; fieldId < structObject.getNumFields(); fieldId++) {
fields.add(structObject.getFieldValue(fieldId));
}
actualValue = decodeRecordReaderStruct(type, fields);
} else if (actualValue instanceof List) {
actualValue = decodeRecordReaderList(type, ((List<?>) actualValue));
} else if (actualValue instanceof Map) {
actualValue = decodeRecordReaderMap(type, (Map<?, ?>) actualValue);
}
return actualValue;
}
use of org.apache.hadoop.io.ByteWritable in project camel by apache.
the class HdfsConsumerTest method testReadByte.
@Test
public void testReadByte() throws Exception {
if (!canTest()) {
return;
}
final Path file = new Path(new File("target/test/test-camel-byte").getAbsolutePath());
Configuration conf = new Configuration();
FileSystem fs1 = FileSystem.get(file.toUri(), conf);
SequenceFile.Writer writer = createWriter(fs1, conf, file, NullWritable.class, ByteWritable.class);
NullWritable keyWritable = NullWritable.get();
ByteWritable valueWritable = new ByteWritable();
byte value = 3;
valueWritable.set(value);
writer.append(keyWritable, valueWritable);
writer.sync();
writer.close();
MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
resultEndpoint.expectedMessageCount(1);
resultEndpoint.message(0).body(byte.class).isEqualTo(3);
context.addRoutes(new RouteBuilder() {
public void configure() {
from("hdfs:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
}
});
context.start();
resultEndpoint.assertIsSatisfied();
}
use of org.apache.hadoop.io.ByteWritable in project camel by apache.
the class HdfsConsumerTest method testReadByte.
@Test
public void testReadByte() throws Exception {
if (!canTest()) {
return;
}
final Path file = new Path(new File("target/test/test-camel-byte").getAbsolutePath());
Configuration conf = new Configuration();
SequenceFile.Writer writer = createWriter(conf, file, NullWritable.class, ByteWritable.class);
NullWritable keyWritable = NullWritable.get();
ByteWritable valueWritable = new ByteWritable();
byte value = 3;
valueWritable.set(value);
writer.append(keyWritable, valueWritable);
writer.sync();
writer.close();
MockEndpoint resultEndpoint = context.getEndpoint("mock:result", MockEndpoint.class);
resultEndpoint.expectedMessageCount(1);
resultEndpoint.message(0).body(byte.class).isEqualTo(3);
context.addRoutes(new RouteBuilder() {
public void configure() {
from("hdfs2:localhost/" + file.toUri() + "?fileSystemType=LOCAL&fileType=SEQUENCE_FILE&initialDelay=0").to("mock:result");
}
});
context.start();
resultEndpoint.assertIsSatisfied();
}
Aggregations