use of org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction in project systemml by apache.
the class CSVAssignRowIDMapper method configure.
@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
byte thisIndex;
try {
// it doesn't make sense to have repeated file names in the input, since this is for reblock
thisIndex = MRJobConfiguration.getInputMatrixIndexesInMapper(job).get(0);
outKey.set(thisIndex);
Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
thisPath = thisPath.makeQualified(fs);
filename = thisPath.toString();
String[] strs = job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT);
Path headerPath = new Path(strs[thisIndex]).makeQualified(fs);
headerFile = headerPath.toString().equals(filename);
CSVReblockInstruction[] reblockInstructions = MRJobConfiguration.getCSVReblockInstructions(job);
for (CSVReblockInstruction ins : reblockInstructions) if (ins.input == thisIndex) {
delim = Pattern.quote(ins.delim);
ignoreFirstLine = ins.hasHeader;
break;
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
use of org.apache.sysml.runtime.instructions.mr.CSVReblockInstruction in project systemml by apache.
the class CSVReblockMapper method configure.
@Override
@SuppressWarnings("deprecation")
public void configure(JobConf job) {
super.configure(job);
// get the number colums per block
// load the offset mapping
byte matrixIndex = representativeMatrixes.get(0);
try {
Path thisPath = new Path(job.get(MRConfigurationNames.MR_MAP_INPUT_FILE));
FileSystem fs = IOUtilFunctions.getFileSystem(thisPath, job);
thisPath = thisPath.makeQualified(fs);
String filename = thisPath.toString();
Path headerPath = new Path(job.getStrings(CSVReblockMR.SMALLEST_FILE_NAME_PER_INPUT)[matrixIndex]).makeQualified(fs);
if (headerPath.toString().equals(filename))
headerFile = true;
ByteWritable key = new ByteWritable();
OffsetCount value = new OffsetCount();
Path p = new Path(job.get(CSVReblockMR.ROWID_FILE_NAME));
SequenceFile.Reader reader = null;
try {
reader = new SequenceFile.Reader(fs, p, job);
while (reader.next(key, value)) {
if (key.get() == matrixIndex && filename.equals(value.filename))
offsetMap.put(value.fileOffset, value.count);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
CSVReblockInstruction ins = csv_reblock_instructions.get(0).get(0);
_delim = ins.delim;
ignoreFirstLine = ins.hasHeader;
idxRow = new IndexedBlockRow();
int maxBclen = 0;
for (ArrayList<CSVReblockInstruction> insv : csv_reblock_instructions) for (CSVReblockInstruction in : insv) {
if (maxBclen < in.bclen)
maxBclen = in.bclen;
}
// always dense since common csv usecase
idxRow.getRow().data.reset(1, maxBclen, false);
}
Aggregations