use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.
the class LindenMapper method map.
@Override
public void map(Object key, Object value, Context context) throws IOException, InterruptedException {
LindenIndexRequest indexRequest;
try {
indexRequest = LindenIndexRequestParser.parse(lindenConfig.getSchema(), value.toString());
} catch (Exception e) {
ExceptionUtils.printRootCauseStackTrace(e);
throw new IllegalStateException("LindenIndexRequestParser parsing error", e);
}
if (indexRequest.getType() != IndexRequestType.INDEX) {
throw new IllegalStateException("Index request type error");
}
Document doc = LindenDocParser.parse(indexRequest.getDoc(), lindenConfig);
// now we have uid and lucene Doc;
IntermediateForm form = new IntermediateForm();
form.process(new Term(lindenConfig.getSchema().getId(), indexRequest.getDoc().getId()), doc, lindenConfig.getIndexAnalyzerInstance(), facetsConfig);
form.closeWriter();
int chosenShard = DefaultShardingStrategy.calculateShard(shards.length, indexRequest);
if (chosenShard >= 0) {
// insert into one shard
context.write(shards[chosenShard], form);
} else {
logger.error("calculateShard failed for " + value.toString());
return;
}
}
use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.
the class LindenCombiner method createForm.
private IntermediateForm createForm(String message) throws IOException {
logger.info("Construct a form writer for " + message);
IntermediateForm form = new IntermediateForm();
return form;
}
use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.
the class LindenReducer method reduce.
@Override
protected void reduce(Shard key, Iterable<IntermediateForm> values, Context context) throws IOException, InterruptedException {
logger.info("Construct a shard writer for " + key);
FileSystem fs = FileSystem.get(conf);
// debug:
logger.info("filesystem is: " + fs.getUri());
String temp = mapreduceuceTempDir + Path.SEPARATOR + "shard_" + key.toFlatString() + "_" + System.currentTimeMillis();
logger.info("mapreduceuceTempDir is: " + mapreduceuceTempDir);
final ShardWriter writer = new ShardWriter(fs, key, temp, conf);
// update the shard
Iterator<IntermediateForm> iterator = values.iterator();
while (iterator.hasNext()) {
IntermediateForm form = iterator.next();
writer.process(form, facetsConfig);
}
// close the shard
new Closeable() {
volatile boolean closed = false;
@Override
public void close() throws IOException {
// spawn a thread to give progress heartbeats
Thread prog = new Thread() {
@Override
public void run() {
while (!closed) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
continue;
} catch (Throwable e) {
return;
}
}
}
};
try {
prog.start();
if (writer != null) {
// added this option to optimize after all the docs have been added;
writer.optimize();
writer.close();
}
} finally {
closed = true;
}
}
}.close();
logger.info("Closed the shard writer for " + key + ", writer = " + writer);
context.write(key, DONE);
}
use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.
the class LindenCombiner method reduce.
@Override
protected void reduce(Shard key, Iterable<IntermediateForm> values, Context context) throws IOException, InterruptedException {
String message = key.toString();
IntermediateForm form = null;
Iterator<IntermediateForm> iterator = values.iterator();
while (iterator.hasNext()) {
IntermediateForm singleDocForm = iterator.next();
long formSize = form == null ? 0 : form.totalSizeInBytes();
long singleDocFormSize = singleDocForm.totalSizeInBytes();
if (form != null && formSize + singleDocFormSize > maxSizeInBytes) {
closeForm(form, message);
context.write(key, form);
form = null;
}
if (form == null && singleDocFormSize >= nearMaxSizeInBytes) {
context.write(key, singleDocForm);
} else {
if (form == null) {
form = createForm(message);
}
form.process(singleDocForm, facetsConfig);
}
}
if (form != null) {
closeForm(form, message);
context.write(key, form);
}
}
Aggregations