Search in sources :

Example 6 with Serializer

use of org.apache.hadoop.io.serializer.Serializer in project apex-malhar by apache.

the class MapOperator method definePartitions.

@SuppressWarnings("rawtypes")
@Override
public Collection<Partition<MapOperator<K1, V1, K2, V2>>> definePartitions(Collection<Partition<MapOperator<K1, V1, K2, V2>>> partitions, PartitioningContext context) {
    int tempPartitionCount = partitionCount;
    Collection c = partitions;
    Collection<Partition<MapOperator<K1, V1, K2, V2>>> operatorPartitions = c;
    Partition<MapOperator<K1, V1, K2, V2>> template;
    Iterator<Partition<MapOperator<K1, V1, K2, V2>>> itr = operatorPartitions.iterator();
    template = itr.next();
    Configuration conf = new Configuration();
    SerializationFactory serializationFactory = new SerializationFactory(conf);
    if (outstream.size() == 0) {
        InputSplit[] splits;
        try {
            splits = getSplits(new JobConf(conf), tempPartitionCount, template.getPartitionedInstance().getDirName());
        } catch (Exception e1) {
            logger.info(" can't get splits {}", e1.getMessage());
            throw new RuntimeException(e1);
        }
        Collection<Partition<MapOperator<K1, V1, K2, V2>>> operList = new ArrayList<Partition<MapOperator<K1, V1, K2, V2>>>();
        itr = operatorPartitions.iterator();
        int size = splits.length;
        Serializer keySerializer = serializationFactory.getSerializer(splits[0].getClass());
        while (size > 0 && itr.hasNext()) {
            Partition<MapOperator<K1, V1, K2, V2>> p = itr.next();
            MapOperator<K1, V1, K2, V2> opr = p.getPartitionedInstance();
            opr.setInputFormatClass(inputFormatClass);
            opr.setMapClass(mapClass);
            opr.setCombineClass(combineClass);
            opr.setConfigFile(configFile);
            try {
                keySerializer.open(opr.getOutstream());
                keySerializer.serialize(splits[size - 1]);
                opr.setInputSplitClass(splits[size - 1].getClass());
            } catch (IOException e) {
                logger.info("error while serializing {}", e.getMessage());
            }
            size--;
            operList.add(p);
        }
        while (size > 0) {
            MapOperator<K1, V1, K2, V2> opr = new MapOperator<K1, V1, K2, V2>();
            opr.setInputFormatClass(inputFormatClass);
            opr.setMapClass(mapClass);
            opr.setCombineClass(combineClass);
            opr.setConfigFile(configFile);
            try {
                keySerializer.open(opr.getOutstream());
                keySerializer.serialize(splits[size - 1]);
                opr.setInputSplitClass(splits[size - 1].getClass());
            } catch (IOException e) {
                logger.info("error while serializing {}", e.getMessage());
            }
            size--;
            operList.add(new DefaultPartition<MapOperator<K1, V1, K2, V2>>(opr));
        }
        try {
            keySerializer.close();
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return operList;
    }
    return null;
}
Also used : DefaultPartition(com.datatorrent.api.DefaultPartition) Configuration(org.apache.hadoop.conf.Configuration) ArrayList(java.util.ArrayList) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) IOException(java.io.IOException) IOException(java.io.IOException) Collection(java.util.Collection) InputSplit(org.apache.hadoop.mapred.InputSplit) JobConf(org.apache.hadoop.mapred.JobConf) Serializer(org.apache.hadoop.io.serializer.Serializer)

Example 7 with Serializer

use of org.apache.hadoop.io.serializer.Serializer in project elephant-bird by twitter.

the class SplitUtil method serializeInputSplit.

public static void serializeInputSplit(Configuration conf, DataOutputStream out, InputSplit split) throws IOException {
    Class<? extends InputSplit> clazz = split.getClass().asSubclass(InputSplit.class);
    Text.writeString(out, clazz.getName());
    SerializationFactory factory = new SerializationFactory(conf);
    Serializer serializer = factory.getSerializer(clazz);
    serializer.open(out instanceof UncloseableDataOutputStream ? out : new UncloseableDataOutputStream(out));
    serializer.serialize(split);
}
Also used : SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) Serializer(org.apache.hadoop.io.serializer.Serializer)

Example 8 with Serializer

use of org.apache.hadoop.io.serializer.Serializer in project goldenorb by jzachr.

the class InputSplitAllocator method assignInputSplits.

/**
 * This method gets the raw splits and calls another method to assign them.
 *
 * @returns Map
 */
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
    List<RawSplit> rawSplits = null;
    JobConf job = new JobConf(orbConf);
    LOG.debug(orbConf.getJobNumber().toString());
    JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
    org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
    try {
        input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);
        List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
        rawSplits = new ArrayList<RawSplit>(splits.size());
        DataOutputBuffer buffer = new DataOutputBuffer();
        SerializationFactory factory = new SerializationFactory(orbConf);
        Serializer serializer = factory.getSerializer(splits.get(0).getClass());
        serializer.open(buffer);
        for (int i = 0; i < splits.size(); i++) {
            buffer.reset();
            serializer.serialize(splits.get(i));
            RawSplit rawSplit = new RawSplit();
            rawSplit.setClassName(splits.get(i).getClass().getName());
            rawSplit.setDataLength(splits.get(i).getLength());
            rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
            rawSplit.setLocations(splits.get(i).getLocations());
            rawSplits.add(rawSplit);
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
    return assignInputSplits(rawSplits);
}
Also used : RawSplit(org.goldenorb.io.input.RawSplit) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) IOException(java.io.IOException) DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) JobContext(org.apache.hadoop.mapreduce.JobContext) JobConf(org.apache.hadoop.mapred.JobConf) JobID(org.apache.hadoop.mapreduce.JobID) Serializer(org.apache.hadoop.io.serializer.Serializer)

Example 9 with Serializer

use of org.apache.hadoop.io.serializer.Serializer in project angel by Tencent.

the class SerdeUtils method serilizeSplit.

@SuppressWarnings({ "unchecked", "rawtypes" })
public static SplitInfo serilizeSplit(org.apache.hadoop.mapred.InputSplit split, Configuration conf) throws IOException {
    if (factory == null) {
        factory = new SerializationFactory(conf);
    }
    DataOutputBuffer out = new DataOutputBuffer(1024);
    try {
        Serializer serializer = factory.getSerializer(split.getClass());
        serializer.open(out);
        serializer.serialize(split);
        SplitInfo ret = new SplitInfo(split.getClass().getName(), out.getData());
        return ret;
    } finally {
        out.close();
    }
}
Also used : DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) SplitInfo(com.tencent.angel.split.SplitInfo) Serializer(org.apache.hadoop.io.serializer.Serializer)

Example 10 with Serializer

use of org.apache.hadoop.io.serializer.Serializer in project angel by Tencent.

the class SerdeUtils method serilizeSplit.

@SuppressWarnings({ "unchecked", "rawtypes" })
public static SplitInfo serilizeSplit(org.apache.hadoop.mapreduce.InputSplit split, Configuration conf) throws IOException {
    if (factory == null) {
        factory = new SerializationFactory(conf);
    }
    DataOutputBuffer out = new DataOutputBuffer(1024);
    try {
        Serializer serializer = factory.getSerializer(split.getClass());
        serializer.open(out);
        serializer.serialize(split);
        SplitInfo ret = new SplitInfo(split.getClass().getName(), out.getData());
        return ret;
    } finally {
        out.close();
    }
}
Also used : DataOutputBuffer(org.apache.hadoop.io.DataOutputBuffer) SerializationFactory(org.apache.hadoop.io.serializer.SerializationFactory) SplitInfo(com.tencent.angel.split.SplitInfo) Serializer(org.apache.hadoop.io.serializer.Serializer)

Aggregations

SerializationFactory (org.apache.hadoop.io.serializer.SerializationFactory)11 Serializer (org.apache.hadoop.io.serializer.Serializer)11 DataOutputBuffer (org.apache.hadoop.io.DataOutputBuffer)4 Configuration (org.apache.hadoop.conf.Configuration)3 JobConf (org.apache.hadoop.mapred.JobConf)3 SplitInfo (com.tencent.angel.split.SplitInfo)2 IOException (java.io.IOException)2 Map (java.util.Map)2 InputSplit (org.apache.hadoop.mapred.InputSplit)2 DefaultPartition (com.datatorrent.api.DefaultPartition)1 ArrayList (java.util.ArrayList)1 Collection (java.util.Collection)1 LinkedList (java.util.LinkedList)1 TreeMap (java.util.TreeMap)1 CollectorTestSink (org.apache.apex.malhar.lib.testbench.CollectorTestSink)1 LocalDirAllocator (org.apache.hadoop.fs.LocalDirAllocator)1 Path (org.apache.hadoop.fs.Path)1 BoundedByteArrayOutputStream (org.apache.hadoop.io.BoundedByteArrayOutputStream)1 BytesWritable (org.apache.hadoop.io.BytesWritable)1 DataInputBuffer (org.apache.hadoop.io.DataInputBuffer)1