use of org.apache.hadoop.io.LongWritable in project incubator-systemml by apache.
the class FrameReaderTextCell method readTextCellFrameFromInputSplit.
protected static void readTextCellFrameFromInputSplit(InputSplit split, TextInputFormat informat, JobConf job, FrameBlock dest) throws IOException {
ValueType[] schema = dest.getSchema();
int rlen = dest.getNumRows();
int clen = dest.getNumColumns();
// create record reader
RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
LongWritable key = new LongWritable();
Text value = new Text();
FastStringTokenizer st = new FastStringTokenizer(' ');
int row = -1;
int col = -1;
try {
while (reader.next(key, value)) {
// reinit tokenizer
st.reset(value.toString());
row = st.nextInt() - 1;
col = st.nextInt() - 1;
if (row == -3)
dest.getColumnMetadata(col).setMvValue(st.nextToken());
else if (row == -2)
dest.getColumnMetadata(col).setNumDistinct(st.nextLong());
else
dest.set(row, col, UtilFunctions.stringToObject(schema[col], st.nextToken()));
}
} catch (Exception ex) {
// post-mortem error handling and bounds checking
if (row < 0 || row + 1 > rlen || col < 0 || col + 1 > clen) {
throw new IOException("Frame cell [" + (row + 1) + "," + (col + 1) + "] " + "out of overall frame range [1:" + rlen + ",1:" + clen + "].");
} else {
throw new IOException("Unable to read frame in text cell format.", ex);
}
} finally {
IOUtilFunctions.closeSilently(reader);
}
}
use of org.apache.hadoop.io.LongWritable in project incubator-systemml by apache.
the class RemoveEmptyRows method execute.
@Override
public void execute() {
Matrix mat = (Matrix) this.getFunctionInput(0);
String fnameOld = mat.getFilePath();
// old,new rowID
HashMap<Long, Long> keyMap = new HashMap<>();
try {
// prepare input
JobConf job = new JobConf(ConfigurationManager.getCachedJobConf());
Path path = new Path(fnameOld);
FileSystem fs = IOUtilFunctions.getFileSystem(path, job);
if (!fs.exists(path))
throw new IOException("File " + fnameOld + " does not exist on HDFS.");
FileInputFormat.addInputPath(job, path);
TextInputFormat informat = new TextInputFormat();
informat.configure(job);
// prepare output
String fnameNew = createOutputFilePathAndName(OUTPUT_FILE);
DataOutputStream ostream = MapReduceTool.getHDFSDataOutputStream(fnameNew, true);
// read and write if necessary
InputSplit[] splits = informat.getSplits(job, 1);
LongWritable key = new LongWritable();
Text value = new Text();
long ID = 1;
try {
// for obj reuse and preventing repeated buffer re-allocations
StringBuilder sb = new StringBuilder();
for (InputSplit split : splits) {
RecordReader<LongWritable, Text> reader = informat.getRecordReader(split, job, Reporter.NULL);
try {
while (reader.next(key, value)) {
String cellStr = value.toString().trim();
StringTokenizer st = new StringTokenizer(cellStr, " ");
long row = Integer.parseInt(st.nextToken());
long col = Integer.parseInt(st.nextToken());
double lvalue = Double.parseDouble(st.nextToken());
if (!keyMap.containsKey(row))
keyMap.put(row, ID++);
long rowNew = keyMap.get(row);
sb.append(rowNew);
sb.append(' ');
sb.append(col);
sb.append(' ');
sb.append(lvalue);
sb.append('\n');
ostream.writeBytes(sb.toString());
sb.setLength(0);
}
} finally {
if (reader != null)
reader.close();
}
}
_ret = new Matrix(fnameNew, keyMap.size(), mat.getNumCols(), ValueType.Double);
} finally {
if (ostream != null)
ostream.close();
}
} catch (Exception ex) {
throw new RuntimeException("Unable to execute external function.", ex);
}
}
use of org.apache.hadoop.io.LongWritable in project hive by apache.
the class TestObjectInspectorConverters method testObjectInspectorConverters.
public void testObjectInspectorConverters() throws Throwable {
try {
// Boolean
Converter booleanConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableBooleanObjectInspector);
assertEquals("BooleanConverter", new BooleanWritable(false), booleanConverter.convert(Integer.valueOf(0)));
assertEquals("BooleanConverter", new BooleanWritable(true), booleanConverter.convert(Integer.valueOf(1)));
assertEquals("BooleanConverter", null, booleanConverter.convert(null));
// Byte
Converter byteConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableByteObjectInspector);
assertEquals("ByteConverter", new ByteWritable((byte) 0), byteConverter.convert(Integer.valueOf(0)));
assertEquals("ByteConverter", new ByteWritable((byte) 1), byteConverter.convert(Integer.valueOf(1)));
assertEquals("ByteConverter", null, byteConverter.convert(null));
// Short
Converter shortConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableShortObjectInspector);
assertEquals("ShortConverter", new ShortWritable((short) 0), shortConverter.convert(Integer.valueOf(0)));
assertEquals("ShortConverter", new ShortWritable((short) 1), shortConverter.convert(Integer.valueOf(1)));
assertEquals("ShortConverter", null, shortConverter.convert(null));
// Int
Converter intConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableIntObjectInspector);
assertEquals("IntConverter", new IntWritable(0), intConverter.convert(Integer.valueOf(0)));
assertEquals("IntConverter", new IntWritable(1), intConverter.convert(Integer.valueOf(1)));
assertEquals("IntConverter", null, intConverter.convert(null));
// Long
Converter longConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableLongObjectInspector);
assertEquals("LongConverter", new LongWritable(0), longConverter.convert(Integer.valueOf(0)));
assertEquals("LongConverter", new LongWritable(1), longConverter.convert(Integer.valueOf(1)));
assertEquals("LongConverter", null, longConverter.convert(null));
// Float
Converter floatConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableFloatObjectInspector);
assertEquals("LongConverter", new FloatWritable(0), floatConverter.convert(Integer.valueOf(0)));
assertEquals("LongConverter", new FloatWritable(1), floatConverter.convert(Integer.valueOf(1)));
assertEquals("LongConverter", null, floatConverter.convert(null));
// Double
Converter doubleConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableDoubleObjectInspector);
assertEquals("DoubleConverter", new DoubleWritable(0), doubleConverter.convert(Integer.valueOf(0)));
assertEquals("DoubleConverter", new DoubleWritable(1), doubleConverter.convert(Integer.valueOf(1)));
assertEquals("DoubleConverter", null, doubleConverter.convert(null));
// Char
Converter charConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, PrimitiveObjectInspectorFactory.javaHiveCharObjectInspector);
assertEquals("CharConverter", new HiveChar("TRUE", -1), charConverter.convert(Boolean.valueOf(true)));
assertEquals("CharConverter", new HiveChar("FALSE", -1), charConverter.convert(Boolean.valueOf(false)));
charConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, PrimitiveObjectInspectorFactory.writableHiveCharObjectInspector);
assertEquals("CharConverter", new HiveCharWritable(new HiveChar("TRUE", -1)), charConverter.convert(Boolean.valueOf(true)));
assertEquals("CharConverter", new HiveCharWritable(new HiveChar("FALSE", -1)), charConverter.convert(Boolean.valueOf(false)));
charConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.javaHiveCharObjectInspector);
assertEquals("CharConverter", new HiveChar("0", -1), charConverter.convert(Integer.valueOf(0)));
assertEquals("CharConverter", new HiveChar("1", -1), charConverter.convert(Integer.valueOf(1)));
charConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableHiveCharObjectInspector);
assertEquals("CharConverter", new HiveCharWritable(new HiveChar("0", -1)), charConverter.convert(Integer.valueOf(0)));
assertEquals("CharConverter", new HiveCharWritable(new HiveChar("1", -1)), charConverter.convert(Integer.valueOf(1)));
charConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaHiveCharObjectInspector);
assertEquals("CharConverter", new HiveChar("hive", -1), charConverter.convert(String.valueOf("hive")));
charConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.writableHiveCharObjectInspector);
assertEquals("CharConverter", new HiveCharWritable(new HiveChar("hive", -1)), charConverter.convert(String.valueOf("hive")));
// VarChar
Converter varcharConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector);
assertEquals("VarCharConverter", new HiveVarchar("TRUE", -1), varcharConverter.convert(Boolean.valueOf(true)));
assertEquals("VarCharConverter", new HiveVarchar("FALSE", -1), varcharConverter.convert(Boolean.valueOf(false)));
varcharConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector, PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector);
assertEquals("VarCharConverter", new HiveVarcharWritable(new HiveVarchar("TRUE", -1)), varcharConverter.convert(Boolean.valueOf(true)));
assertEquals("VarCharConverter", new HiveVarcharWritable(new HiveVarchar("FALSE", -1)), varcharConverter.convert(Boolean.valueOf(false)));
varcharConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector);
assertEquals("VarCharConverter", new HiveVarchar("0", -1), varcharConverter.convert(Integer.valueOf(0)));
assertEquals("VarCharConverter", new HiveVarchar("1", -1), varcharConverter.convert(Integer.valueOf(1)));
varcharConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector);
assertEquals("VarCharConverter", new HiveVarcharWritable(new HiveVarchar("0", -1)), varcharConverter.convert(Integer.valueOf(0)));
assertEquals("VarCharConverter", new HiveVarcharWritable(new HiveVarchar("1", -1)), varcharConverter.convert(Integer.valueOf(1)));
varcharConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.javaHiveVarcharObjectInspector);
assertEquals("VarCharConverter", new HiveVarchar("hive", -1), varcharConverter.convert(String.valueOf("hive")));
varcharConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.writableHiveVarcharObjectInspector);
assertEquals("VarCharConverter", new HiveVarcharWritable(new HiveVarchar("hive", -1)), varcharConverter.convert(String.valueOf("hive")));
// Text
Converter textConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaIntObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector);
assertEquals("TextConverter", new Text("0"), textConverter.convert(Integer.valueOf(0)));
assertEquals("TextConverter", new Text("1"), textConverter.convert(Integer.valueOf(1)));
assertEquals("TextConverter", null, textConverter.convert(null));
textConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.writableBinaryObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector);
assertEquals("TextConverter", new Text("hive"), textConverter.convert(new BytesWritable(new byte[] { (byte) 'h', (byte) 'i', (byte) 'v', (byte) 'e' })));
assertEquals("TextConverter", null, textConverter.convert(null));
textConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.writableStringObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector);
assertEquals("TextConverter", new Text("hive"), textConverter.convert(new Text("hive")));
assertEquals("TextConverter", null, textConverter.convert(null));
textConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector);
assertEquals("TextConverter", new Text("hive"), textConverter.convert(new String("hive")));
assertEquals("TextConverter", null, textConverter.convert(null));
textConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaHiveDecimalObjectInspector, PrimitiveObjectInspectorFactory.writableStringObjectInspector);
assertEquals("TextConverter", new Text("100.001"), textConverter.convert(HiveDecimal.create("100.001")));
assertEquals("TextConverter", null, textConverter.convert(null));
// Binary
Converter baConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.javaStringObjectInspector, PrimitiveObjectInspectorFactory.writableBinaryObjectInspector);
assertEquals("BAConverter", new BytesWritable(new byte[] { (byte) 'h', (byte) 'i', (byte) 'v', (byte) 'e' }), baConverter.convert("hive"));
assertEquals("BAConverter", null, baConverter.convert(null));
baConverter = ObjectInspectorConverters.getConverter(PrimitiveObjectInspectorFactory.writableStringObjectInspector, PrimitiveObjectInspectorFactory.writableBinaryObjectInspector);
assertEquals("BAConverter", new BytesWritable(new byte[] { (byte) 'h', (byte) 'i', (byte) 'v', (byte) 'e' }), baConverter.convert(new Text("hive")));
assertEquals("BAConverter", null, baConverter.convert(null));
// Union
ArrayList<String> fieldNames = new ArrayList<String>();
fieldNames.add("firstInteger");
fieldNames.add("secondString");
fieldNames.add("thirdBoolean");
ArrayList<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>();
fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
ArrayList<String> fieldNames2 = new ArrayList<String>();
fieldNames2.add("firstString");
fieldNames2.add("secondInteger");
fieldNames2.add("thirdBoolean");
ArrayList<ObjectInspector> fieldObjectInspectors2 = new ArrayList<ObjectInspector>();
fieldObjectInspectors2.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldObjectInspectors2.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
fieldObjectInspectors2.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
Converter unionConverter0 = ObjectInspectorConverters.getConverter(ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectors), ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectors2));
Object convertedObject0 = unionConverter0.convert(new StandardUnion((byte) 0, 1));
StandardUnion expectedObject0 = new StandardUnion();
expectedObject0.setTag((byte) 0);
expectedObject0.setObject("1");
assertEquals(expectedObject0, convertedObject0);
Converter unionConverter1 = ObjectInspectorConverters.getConverter(ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectors), ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectors2));
Object convertedObject1 = unionConverter1.convert(new StandardUnion((byte) 1, "1"));
StandardUnion expectedObject1 = new StandardUnion();
expectedObject1.setTag((byte) 1);
expectedObject1.setObject(1);
assertEquals(expectedObject1, convertedObject1);
Converter unionConverter2 = ObjectInspectorConverters.getConverter(ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectors), ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectors2));
Object convertedObject2 = unionConverter2.convert(new StandardUnion((byte) 2, true));
StandardUnion expectedObject2 = new StandardUnion();
expectedObject2.setTag((byte) 2);
expectedObject2.setObject(true);
assertEquals(expectedObject2, convertedObject2);
// Union (extra fields)
ArrayList<String> fieldNamesExtra = new ArrayList<String>();
fieldNamesExtra.add("firstInteger");
fieldNamesExtra.add("secondString");
fieldNamesExtra.add("thirdBoolean");
ArrayList<ObjectInspector> fieldObjectInspectorsExtra = new ArrayList<ObjectInspector>();
fieldObjectInspectorsExtra.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
fieldObjectInspectorsExtra.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldObjectInspectorsExtra.add(PrimitiveObjectInspectorFactory.javaBooleanObjectInspector);
ArrayList<String> fieldNamesExtra2 = new ArrayList<String>();
fieldNamesExtra2.add("firstString");
fieldNamesExtra2.add("secondInteger");
ArrayList<ObjectInspector> fieldObjectInspectorsExtra2 = new ArrayList<ObjectInspector>();
fieldObjectInspectorsExtra2.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
fieldObjectInspectorsExtra2.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);
Converter unionConverterExtra = ObjectInspectorConverters.getConverter(ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectorsExtra), ObjectInspectorFactory.getStandardUnionObjectInspector(fieldObjectInspectorsExtra2));
Object convertedObjectExtra = unionConverterExtra.convert(new StandardUnion((byte) 2, true));
StandardUnion expectedObjectExtra = new StandardUnion();
expectedObjectExtra.setTag((byte) -1);
expectedObjectExtra.setObject(null);
// we should get back null
assertEquals(expectedObjectExtra, convertedObjectExtra);
} catch (Throwable e) {
e.printStackTrace();
throw e;
}
}
use of org.apache.hadoop.io.LongWritable in project systemml by apache.
the class ReblockSPInstruction method processFrameReblockInstruction.
@SuppressWarnings("unchecked")
protected void processFrameReblockInstruction(SparkExecutionContext sec, InputInfo iinfo) {
FrameObject fo = sec.getFrameObject(input1.getName());
MatrixCharacteristics mcOut = sec.getMatrixCharacteristics(output.getName());
if (iinfo == InputInfo.TextCellInputInfo) {
// get the input textcell rdd
JavaPairRDD<LongWritable, Text> lines = (JavaPairRDD<LongWritable, Text>) sec.getRDDHandleForVariable(input1.getName(), iinfo);
// convert textcell to binary block
JavaPairRDD<Long, FrameBlock> out = FrameRDDConverterUtils.textCellToBinaryBlock(sec.getSparkContext(), lines, mcOut, fo.getSchema());
// put output RDD handle into symbol table
sec.setRDDHandleForVariable(output.getName(), out);
sec.addLineageRDD(output.getName(), input1.getName());
} else if (iinfo == InputInfo.CSVInputInfo) {
// HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read
// throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction");
CSVReblockSPInstruction csvInstruction = null;
boolean hasHeader = false;
String delim = ",";
boolean fill = false;
double fillValue = 0;
if (fo.getFileFormatProperties() instanceof CSVFileFormatProperties && fo.getFileFormatProperties() != null) {
CSVFileFormatProperties props = (CSVFileFormatProperties) fo.getFileFormatProperties();
hasHeader = props.hasHeader();
delim = props.getDelim();
fill = props.isFill();
fillValue = props.getFillValue();
}
csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString);
csvInstruction.processInstruction(sec);
} else {
throw new DMLRuntimeException("The given InputInfo is not implemented " + "for ReblockSPInstruction: " + InputInfo.inputInfoToString(iinfo));
}
}
use of org.apache.hadoop.io.LongWritable in project systemml by apache.
the class WriteSPInstruction method processFrameWriteInstruction.
@SuppressWarnings("unchecked")
protected void processFrameWriteInstruction(SparkExecutionContext sec, String fname, OutputInfo oi, ValueType[] schema) throws IOException {
// get input rdd
JavaPairRDD<Long, FrameBlock> in1 = (JavaPairRDD<Long, FrameBlock>) sec.getRDDHandleForVariable(input1.getName(), InputInfo.BinaryBlockInputInfo);
MatrixCharacteristics mc = sec.getMatrixCharacteristics(input1.getName());
if (oi == OutputInfo.TextCellOutputInfo) {
JavaRDD<String> out = FrameRDDConverterUtils.binaryBlockToTextCell(in1, mc);
customSaveTextFile(out, fname, false);
} else if (oi == OutputInfo.CSVOutputInfo) {
CSVFileFormatProperties props = (formatProperties != null) ? (CSVFileFormatProperties) formatProperties : null;
JavaRDD<String> out = FrameRDDConverterUtils.binaryBlockToCsv(in1, mc, props, true);
customSaveTextFile(out, fname, false);
} else if (oi == OutputInfo.BinaryBlockOutputInfo) {
JavaPairRDD<LongWritable, FrameBlock> out = in1.mapToPair(new LongFrameToLongWritableFrameFunction());
out.saveAsHadoopFile(fname, LongWritable.class, FrameBlock.class, SequenceFileOutputFormat.class);
} else {
// unsupported formats: binarycell (not externalized)
throw new DMLRuntimeException("Unexpected data format: " + OutputInfo.outputInfoToString(oi));
}
// write meta data file
MapReduceTool.writeMetaDataFile(fname + ".mtd", input1.getValueType(), schema, DataType.FRAME, mc, oi, formatProperties);
}
Aggregations