use of org.apache.crunch.impl.mr.run.CrunchRuntimeException in project crunch by cloudera.
the class HBaseTarget method configureForMapReduce.
@Override
public void configureForMapReduce(Job job, PType<?> ptype, Path outputPath, String name) {
Configuration conf = job.getConfiguration();
HBaseConfiguration.addHbaseResources(conf);
job.setOutputFormatClass(TableOutputFormat.class);
conf.set(TableOutputFormat.OUTPUT_TABLE, table);
try {
TableMapReduceUtil.addDependencyJars(job);
} catch (IOException e) {
throw new CrunchRuntimeException(e);
}
}
use of org.apache.crunch.impl.mr.run.CrunchRuntimeException in project crunch by cloudera.
the class DistCache method read.
public static Object read(Configuration conf, Path path) throws IOException {
URI target = null;
for (URI uri : DistributedCache.getCacheFiles(conf)) {
if (uri.toString().equals(path.toString())) {
target = uri;
break;
}
}
Object value = null;
if (target != null) {
Path targetPath = new Path(target.toString());
ObjectInputStream ois = new ObjectInputStream(targetPath.getFileSystem(conf).open(targetPath));
try {
value = ois.readObject();
} catch (ClassNotFoundException e) {
throw new CrunchRuntimeException(e);
}
ois.close();
}
return value;
}
use of org.apache.crunch.impl.mr.run.CrunchRuntimeException in project crunch by cloudera.
the class MapsideJoin method join.
/**
* Join two tables using a map side join. The right-side table will be loaded
* fully in memory, so this method should only be used if the right side
* table's contents can fit in the memory allocated to mappers. The join
* performed by this method is an inner join.
*
* @param left
* The left-side table of the join
* @param right
* The right-side table of the join, whose contents will be fully
* read into memory
* @return A table keyed on the join key, containing pairs of joined values
*/
public static <K, U, V> PTable<K, Pair<U, V>> join(PTable<K, U> left, PTable<K, V> right) {
if (!(right.getPipeline() instanceof MRPipeline)) {
throw new CrunchRuntimeException("Map-side join is only supported within a MapReduce context");
}
MRPipeline pipeline = (MRPipeline) right.getPipeline();
pipeline.materialize(right);
// TODO Move necessary logic to MRPipeline so that we can theoretically
// optimize his by running the setup of multiple map-side joins concurrently
pipeline.run();
ReadableSourceTarget<Pair<K, V>> readableSourceTarget = pipeline.getMaterializeSourceTarget(right);
if (!(readableSourceTarget instanceof SourcePathTargetImpl)) {
throw new CrunchRuntimeException("Right-side contents can't be read from a path");
}
// Suppress warnings because we've just checked this cast via instanceof
@SuppressWarnings("unchecked") SourcePathTargetImpl<Pair<K, V>> sourcePathTarget = (SourcePathTargetImpl<Pair<K, V>>) readableSourceTarget;
Path path = sourcePathTarget.getPath();
DistributedCache.addCacheFile(path.toUri(), pipeline.getConfiguration());
MapsideJoinDoFn<K, U, V> mapJoinDoFn = new MapsideJoinDoFn<K, U, V>(path.toString(), right.getPType());
PTypeFamily typeFamily = left.getTypeFamily();
return left.parallelDo("mapjoin", mapJoinDoFn, typeFamily.tableOf(left.getKeyType(), typeFamily.pairs(left.getValueType(), right.getValueType())));
}
use of org.apache.crunch.impl.mr.run.CrunchRuntimeException in project crunch by cloudera.
the class AvroDeepCopier method deepCopy.
/**
* Create a deep copy of an Avro value.
*
* @param source
* The value to be copied
* @return The deep copy of the value
*/
public T deepCopy(T source) {
ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
binaryEncoder = EncoderFactory.get().binaryEncoder(byteOutStream, binaryEncoder);
T target = createCopyTarget();
try {
datumWriter.write(source, binaryEncoder);
binaryEncoder.flush();
binaryDecoder = DecoderFactory.get().binaryDecoder(byteOutStream.toByteArray(), binaryDecoder);
datumReader.read(target, binaryDecoder);
} catch (Exception e) {
throw new CrunchRuntimeException("Error while deep copying avro value " + source, e);
}
return target;
}
use of org.apache.crunch.impl.mr.run.CrunchRuntimeException in project crunch by cloudera.
the class OutputEmitter method emit.
public void emit(T emitted) {
try {
K key = converter.outputKey(emitted);
V value = converter.outputValue(emitted);
this.context.write(key, value);
} catch (IOException e) {
throw new CrunchRuntimeException(e);
} catch (InterruptedException e) {
throw new CrunchRuntimeException(e);
}
}
Aggregations