Search in sources :

Example 1 with IntermediateForm

use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.

the class LindenMapper method map.

@Override
public void map(Object key, Object value, Context context) throws IOException, InterruptedException {
    LindenIndexRequest indexRequest;
    try {
        indexRequest = LindenIndexRequestParser.parse(lindenConfig.getSchema(), value.toString());
    } catch (Exception e) {
        ExceptionUtils.printRootCauseStackTrace(e);
        throw new IllegalStateException("LindenIndexRequestParser parsing error", e);
    }
    if (indexRequest.getType() != IndexRequestType.INDEX) {
        throw new IllegalStateException("Index request type error");
    }
    Document doc = LindenDocParser.parse(indexRequest.getDoc(), lindenConfig);
    // now we have uid and lucene Doc;
    IntermediateForm form = new IntermediateForm();
    form.process(new Term(lindenConfig.getSchema().getId(), indexRequest.getDoc().getId()), doc, lindenConfig.getIndexAnalyzerInstance(), facetsConfig);
    form.closeWriter();
    int chosenShard = DefaultShardingStrategy.calculateShard(shards.length, indexRequest);
    if (chosenShard >= 0) {
        // insert into one shard
        context.write(shards[chosenShard], form);
    } else {
        logger.error("calculateShard failed for " + value.toString());
        return;
    }
}
Also used : IntermediateForm(com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm) LindenIndexRequest(com.xiaomi.linden.thrift.common.LindenIndexRequest) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) IOException(java.io.IOException)

Example 2 with IntermediateForm

use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.

the class LindenCombiner method createForm.

private IntermediateForm createForm(String message) throws IOException {
    logger.info("Construct a form writer for " + message);
    IntermediateForm form = new IntermediateForm();
    return form;
}
Also used : IntermediateForm(com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm)

Example 3 with IntermediateForm

use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.

the class LindenReducer method reduce.

@Override
protected void reduce(Shard key, Iterable<IntermediateForm> values, Context context) throws IOException, InterruptedException {
    logger.info("Construct a shard writer for " + key);
    FileSystem fs = FileSystem.get(conf);
    // debug:
    logger.info("filesystem is: " + fs.getUri());
    String temp = mapreduceuceTempDir + Path.SEPARATOR + "shard_" + key.toFlatString() + "_" + System.currentTimeMillis();
    logger.info("mapreduceuceTempDir is: " + mapreduceuceTempDir);
    final ShardWriter writer = new ShardWriter(fs, key, temp, conf);
    // update the shard
    Iterator<IntermediateForm> iterator = values.iterator();
    while (iterator.hasNext()) {
        IntermediateForm form = iterator.next();
        writer.process(form, facetsConfig);
    }
    // close the shard
    new Closeable() {

        volatile boolean closed = false;

        @Override
        public void close() throws IOException {
            // spawn a thread to give progress heartbeats
            Thread prog = new Thread() {

                @Override
                public void run() {
                    while (!closed) {
                        try {
                            Thread.sleep(1000);
                        } catch (InterruptedException e) {
                            continue;
                        } catch (Throwable e) {
                            return;
                        }
                    }
                }
            };
            try {
                prog.start();
                if (writer != null) {
                    // added this option to optimize after all the docs have been added;
                    writer.optimize();
                    writer.close();
                }
            } finally {
                closed = true;
            }
        }
    }.close();
    logger.info("Closed the shard writer for " + key + ", writer = " + writer);
    context.write(key, DONE);
}
Also used : IntermediateForm(com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm) FileSystem(org.apache.hadoop.fs.FileSystem) Closeable(java.io.Closeable) IOException(java.io.IOException)

Example 4 with IntermediateForm

use of com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm in project linden by XiaoMi.

the class LindenCombiner method reduce.

@Override
protected void reduce(Shard key, Iterable<IntermediateForm> values, Context context) throws IOException, InterruptedException {
    String message = key.toString();
    IntermediateForm form = null;
    Iterator<IntermediateForm> iterator = values.iterator();
    while (iterator.hasNext()) {
        IntermediateForm singleDocForm = iterator.next();
        long formSize = form == null ? 0 : form.totalSizeInBytes();
        long singleDocFormSize = singleDocForm.totalSizeInBytes();
        if (form != null && formSize + singleDocFormSize > maxSizeInBytes) {
            closeForm(form, message);
            context.write(key, form);
            form = null;
        }
        if (form == null && singleDocFormSize >= nearMaxSizeInBytes) {
            context.write(key, singleDocForm);
        } else {
            if (form == null) {
                form = createForm(message);
            }
            form.process(singleDocForm, facetsConfig);
        }
    }
    if (form != null) {
        closeForm(form, message);
        context.write(key, form);
    }
}
Also used : IntermediateForm(com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm)

Aggregations

IntermediateForm (com.xiaomi.linden.hadoop.indexing.keyvalueformat.IntermediateForm)4 IOException (java.io.IOException)2 LindenIndexRequest (com.xiaomi.linden.thrift.common.LindenIndexRequest)1 Closeable (java.io.Closeable)1 FileSystem (org.apache.hadoop.fs.FileSystem)1 Document (org.apache.lucene.document.Document)1 Term (org.apache.lucene.index.Term)1