use of org.apache.hadoop.io.serializer.Serializer in project apex-malhar by apache.
the class MapOperator method definePartitions.
@SuppressWarnings("rawtypes")
@Override
public Collection<Partition<MapOperator<K1, V1, K2, V2>>> definePartitions(Collection<Partition<MapOperator<K1, V1, K2, V2>>> partitions, PartitioningContext context) {
int tempPartitionCount = partitionCount;
Collection c = partitions;
Collection<Partition<MapOperator<K1, V1, K2, V2>>> operatorPartitions = c;
Partition<MapOperator<K1, V1, K2, V2>> template;
Iterator<Partition<MapOperator<K1, V1, K2, V2>>> itr = operatorPartitions.iterator();
template = itr.next();
Configuration conf = new Configuration();
SerializationFactory serializationFactory = new SerializationFactory(conf);
if (outstream.size() == 0) {
InputSplit[] splits;
try {
splits = getSplits(new JobConf(conf), tempPartitionCount, template.getPartitionedInstance().getDirName());
} catch (Exception e1) {
logger.info(" can't get splits {}", e1.getMessage());
throw new RuntimeException(e1);
}
Collection<Partition<MapOperator<K1, V1, K2, V2>>> operList = new ArrayList<Partition<MapOperator<K1, V1, K2, V2>>>();
itr = operatorPartitions.iterator();
int size = splits.length;
Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
while (size > 0 && itr.hasNext()) {
Partition<MapOperator<K1, V1, K2, V2>> p = itr.next();
MapOperator<K1, V1, K2, V2> opr = p.getPartitionedInstance();
opr.setInputFormatClass(inputFormatClass);
opr.setMapClass(mapClass);
opr.setCombineClass(combineClass);
opr.setConfigFile(configFile);
try {
keySerializer.open(opr.getOutstream());
keySerializer.serialize(splits[size - 1]);
opr.setInputSplitClass(splits[size - 1].getClass());
} catch (IOException e) {
logger.info("error while serializing {}", e.getMessage());
}
size--;
operList.add(p);
}
while (size > 0) {
MapOperator<K1, V1, K2, V2> opr = new MapOperator<K1, V1, K2, V2>();
opr.setInputFormatClass(inputFormatClass);
opr.setMapClass(mapClass);
opr.setCombineClass(combineClass);
opr.setConfigFile(configFile);
try {
keySerializer.open(opr.getOutstream());
keySerializer.serialize(splits[size - 1]);
opr.setInputSplitClass(splits[size - 1].getClass());
} catch (IOException e) {
logger.info("error while serializing {}", e.getMessage());
}
size--;
operList.add(new DefaultPartition<MapOperator<K1, V1, K2, V2>>(opr));
}
try {
keySerializer.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
return operList;
}
return null;
}
use of org.apache.hadoop.io.serializer.Serializer in project elephant-bird by twitter.
the class SplitUtil method serializeInputSplit.
public static void serializeInputSplit(Configuration conf, DataOutputStream out, InputSplit split) throws IOException {
Class<? extends InputSplit> clazz = split.getClass().asSubclass(InputSplit.class);
Text.writeString(out, clazz.getName());
SerializationFactory factory = new SerializationFactory(conf);
Serializer serializer = factory.getSerializer(clazz);
serializer.open(out instanceof UncloseableDataOutputStream ? out : new UncloseableDataOutputStream(out));
serializer.serialize(split);
}
use of org.apache.hadoop.io.serializer.Serializer in project goldenorb by jzachr.
the class InputSplitAllocator method assignInputSplits.
/**
* This method gets the raw splits and calls another method to assign them.
*
* @returns Map
*/
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
List<RawSplit> rawSplits = null;
JobConf job = new JobConf(orbConf);
LOG.debug(orbConf.getJobNumber().toString());
JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
try {
input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
rawSplits = new ArrayList<RawSplit>(splits.size());
DataOutputBuffer buffer = new DataOutputBuffer();
SerializationFactory factory = new SerializationFactory(orbConf);
Serializer serializer = factory.getSerializer(splits.get(0).getClass());
serializer.open(buffer);
for (int i = 0; i < splits.size(); i++) {
buffer.reset();
serializer.serialize(splits.get(i));
RawSplit rawSplit = new RawSplit();
rawSplit.setClassName(splits.get(i).getClass().getName());
rawSplit.setDataLength(splits.get(i).getLength());
rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
rawSplit.setLocations(splits.get(i).getLocations());
rawSplits.add(rawSplit);
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (IOException e) {
e.printStackTrace();
throw new RuntimeException(e);
} catch (InterruptedException e) {
e.printStackTrace();
throw new RuntimeException(e);
}
return assignInputSplits(rawSplits);
}
use of org.apache.hadoop.io.serializer.Serializer in project angel by Tencent.
the class SerdeUtils method serilizeSplit.
@SuppressWarnings({ "unchecked", "rawtypes" })
public static SplitInfo serilizeSplit(org.apache.hadoop.mapred.InputSplit split, Configuration conf) throws IOException {
if (factory == null) {
factory = new SerializationFactory(conf);
}
DataOutputBuffer out = new DataOutputBuffer(1024);
try {
Serializer serializer = factory.getSerializer(split.getClass());
serializer.open(out);
serializer.serialize(split);
SplitInfo ret = new SplitInfo(split.getClass().getName(), out.getData());
return ret;
} finally {
out.close();
}
}
use of org.apache.hadoop.io.serializer.Serializer in project angel by Tencent.
the class SerdeUtils method serilizeSplit.
@SuppressWarnings({ "unchecked", "rawtypes" })
public static SplitInfo serilizeSplit(org.apache.hadoop.mapreduce.InputSplit split, Configuration conf) throws IOException {
if (factory == null) {
factory = new SerializationFactory(conf);
}
DataOutputBuffer out = new DataOutputBuffer(1024);
try {
Serializer serializer = factory.getSerializer(split.getClass());
serializer.open(out);
serializer.serialize(split);
SplitInfo ret = new SplitInfo(split.getClass().getName(), out.getData());
return ret;
} finally {
out.close();
}
}
Aggregations