use of org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount in project incubator-systemml by apache.
the class GTFMTDReducer method generateOffsetsFile.
@SuppressWarnings({ "unchecked", "deprecation" })
private long generateOffsetsFile(ArrayList<OffsetCount> list) throws IllegalArgumentException, IOException {
Collections.sort(list);
SequenceFile.Writer writer = null;
long lineOffset = 0;
try {
writer = new SequenceFile.Writer(FileSystem.get(_rJob), _rJob, new Path(_agents.getOffsetFile() + "/part-00000"), ByteWritable.class, OffsetCount.class);
for (OffsetCount oc : list) {
long count = oc.count;
oc.count = lineOffset;
writer.append(new ByteWritable((byte) 0), oc);
lineOffset += count;
}
} finally {
IOUtilFunctions.closeSilently(writer);
}
list.clear();
return lineOffset;
}
use of org.apache.sysml.runtime.matrix.CSVReblockMR.OffsetCount in project incubator-systemml by apache.
the class GTFMTDMapper method close.
@Override
public void close() throws IOException {
_agents.getMVImputeAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
_agents.getRecodeAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
_agents.getBinAgent().mapOutputTransformationMetadata(_collector, _mapTaskID, _agents);
// OffsetCount is denoted as a DistinctValue by concatenating parfile name and offset within partfile.
if (_collector != null) {
IntWritable key = new IntWritable((int) _agents.getNumCols() + 1);
DistinctValue val = new DistinctValue(new OffsetCount(_partFileName, _offsetInPartFile, _agents.getValid()));
_collector.collect(key, val);
}
// reset global variables, required when the jvm is reused.
_firstRecordInSplit = true;
_offsetInPartFile = -1;
_partFileWithHeader = false;
}
Aggregations