use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.
the class CSVReblockMapper method configure.
@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
super.configure(job);
//get the number colums per block
//load the offset mapping
byte matrixIndex = representativeMatrixes.get(0);
try {
Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
thisPath = thisPath.makeQualified(fs);
String filename = thisPath.toString();
Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex]).makeQualified(fs);
if (headerPath.toString().equals(filename))
headerFile = true;
ByteWritable key = new ByteWritable();
OffsetCount value = new OffsetCount();
Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(fs, p, job);
while (reader.next(key, value)) {
if (key.get() == matrixIndex && filename.equals(value.filename))
offsetMap.put(value.fileOffset, value.count);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
_delim = ins.delim;
ignoreFirstLine = ins.hasHeader;
idxRow = new IndexedBlockRow();
int maxBclen = 0;
for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) {
if (maxBclen < in.bclen)
maxBclen = in.bclen;
}
//always dense since common csv usecase
idxRow.getRow().data.reset(1, maxBclen, false);
}
use of org.apache.hadoop.io.ByteWritable in project incubator-systemml by apache.
the class GTFMTDReducer method generateOffsetsFile.
@SuppressWarnings({ "unchecked", "deprecation" })
private long generateOffsetsFile(ArrayList<OffsetCount> list) throws IllegalArgumentException, IOException {
Collections.sort(list);
SequenceFile.Writer writer = null;
long lineOffset = 0;
try {
writer = new SequenceFile.Writer(FileSystem.get(_rJob), _rJob, new Path(_agents.getOffsetFile() + "/part-00000"), ByteWritable.class, OffsetCount.class);
for (OffsetCount oc : list) {
long count = oc.count;
oc.count = lineOffset;
writer.append(new ByteWritable((byte) 0), oc);
lineOffset += count;
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
list.clear();
return lineOffset;
}
Aggregations