Search in sources :

Example 11 with IndexArgs

use of io.anserini.index.IndexArgs in project Anserini by castorini.

the class TweetEndToEndTest method getIndexArgs.

// Note that in the test cases, we have:
// {... "id":1,"id_str":"1","text":"RT This is a Retweet and will NOT NOT be indexed!" ... }
// {... "id":10,"id_str":"10","text":"This tweet won't be indexed since the maxId is 9" ... }
// 
// src/test/resources/sample_docs/tweets/tweets1: 5 JSON objects, 2 deletes
// src/test/resources/sample_docs/tweets/tweets2: 4 JSON objects, 1 deletes
// 
// Thus, there should be a total of 4 documents indexed: 9 objects - 5 skipped
@Override
protected IndexArgs getIndexArgs() {
    IndexArgs indexArgs = createDefaultIndexArgs();
    indexArgs.input = "src/test/resources/sample_docs/tweets/collection1";
    indexArgs.collectionClass = TweetCollection.class.getSimpleName();
    indexArgs.generatorClass = TweetGenerator.class.getSimpleName();
    indexArgs.tweetMaxId = 9L;
    return indexArgs;
}
Also used : TweetGenerator(io.anserini.index.generator.TweetGenerator) IndexArgs(io.anserini.index.IndexArgs) TweetCollection(io.anserini.collection.TweetCollection)

Example 12 with IndexArgs

use of io.anserini.index.IndexArgs in project Anserini by castorini.

the class GeoGeneratorTest method riverSetUp.

@Before
public void riverSetUp() {
    ObjectMapper mapper = new ObjectMapper();
    ObjectNode jsonObj = mapper.createObjectNode();
    jsonObj.set("HYRIV_ID", TextNode.valueOf("90000003"));
    jsonObj.set("NEXT_DOWN", TextNode.valueOf("0"));
    jsonObj.set("MAIN_RIV", TextNode.valueOf("90000003"));
    jsonObj.set("LENGTH_KM", TextNode.valueOf("3.02"));
    jsonObj.set("DIST_DN_KM", TextNode.valueOf("0.0"));
    jsonObj.set("DIST_UP_KM", TextNode.valueOf("35.3"));
    jsonObj.set("CATCH_SKM", TextNode.valueOf("12.24"));
    jsonObj.set("UPLAND_SKM", TextNode.valueOf("12.2"));
    jsonObj.set("ENDORHEIC", TextNode.valueOf("0"));
    jsonObj.set("DIS_AV_CMS", TextNode.valueOf("0.03"));
    jsonObj.set("ORD_STRA", TextNode.valueOf("1"));
    jsonObj.set("ORD_CLAS", TextNode.valueOf("1"));
    jsonObj.set("ORD_FLOW", TextNode.valueOf("8"));
    jsonObj.set("HYBAS_L12", TextNode.valueOf("9120016580"));
    jsonObj.set("geometry", TextNode.valueOf("LINESTRING (-29.737500000000722 83.54583333333295, -29.731250000000642 83.55208333333294, -29.731250000000642 83.57291666666629)"));
    jsonObj.set("id", TextNode.valueOf("90000003"));
    geoDoc = new JsonCollection.Document(jsonObj);
    GeoGenerator generator = new GeoGenerator(new IndexArgs());
    doc = generator.createDocument(geoDoc);
}
Also used : ObjectNode(com.fasterxml.jackson.databind.node.ObjectNode) IndexArgs(io.anserini.index.IndexArgs) JsonCollection(io.anserini.collection.JsonCollection) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Before(org.junit.Before)

Example 13 with IndexArgs

use of io.anserini.index.IndexArgs in project Anserini by castorini.

the class BibtexEndToEndTest method getIndexArgs.

@Override
protected IndexArgs getIndexArgs() {
    IndexArgs indexArgs = createDefaultIndexArgs();
    indexArgs.input = "src/test/resources/sample_docs/bib/acl";
    indexArgs.collectionClass = BibtexCollection.class.getSimpleName();
    indexArgs.generatorClass = BibtexGenerator.class.getSimpleName();
    return indexArgs;
}
Also used : BibtexGenerator(io.anserini.index.generator.BibtexGenerator) IndexArgs(io.anserini.index.IndexArgs) BibtexCollection(io.anserini.collection.BibtexCollection)

Example 14 with IndexArgs

use of io.anserini.index.IndexArgs in project Anserini by castorini.

the class CoreEndToEndTest method getIndexArgs.

@Override
protected IndexArgs getIndexArgs() {
    IndexArgs indexArgs = createDefaultIndexArgs();
    indexArgs.input = "src/test/resources/sample_docs/core";
    indexArgs.collectionClass = CoreCollection.class.getSimpleName();
    indexArgs.generatorClass = CoreGenerator.class.getSimpleName();
    return indexArgs;
}
Also used : CoreCollection(io.anserini.collection.CoreCollection) IndexArgs(io.anserini.index.IndexArgs) CoreGenerator(io.anserini.index.generator.CoreGenerator)

Example 15 with IndexArgs

use of io.anserini.index.IndexArgs in project Anserini by castorini.

the class TrecEndToEndTest method getIndexArgs.

@Override
protected IndexArgs getIndexArgs() {
    IndexArgs indexArgs = createDefaultIndexArgs();
    indexArgs.input = "src/test/resources/sample_docs/trec/collection2";
    indexArgs.collectionClass = TrecCollection.class.getSimpleName();
    return indexArgs;
}
Also used : TrecCollection(io.anserini.collection.TrecCollection) IndexArgs(io.anserini.index.IndexArgs)

Aggregations

IndexArgs (io.anserini.index.IndexArgs)22 TrecCollection (io.anserini.collection.TrecCollection)6 CoreCollection (io.anserini.collection.CoreCollection)3 JsonCollection (io.anserini.collection.JsonCollection)3 DefaultLuceneDocumentGenerator (io.anserini.index.generator.DefaultLuceneDocumentGenerator)3 Before (org.junit.Before)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 ObjectNode (com.fasterxml.jackson.databind.node.ObjectNode)2 AclAnthology (io.anserini.collection.AclAnthology)2 AclAnthologyGenerator (io.anserini.index.generator.AclAnthologyGenerator)2 CoreGenerator (io.anserini.index.generator.CoreGenerator)2 BibtexCollection (io.anserini.collection.BibtexCollection)1 C4Collection (io.anserini.collection.C4Collection)1 JsonVectorCollection (io.anserini.collection.JsonVectorCollection)1 TweetCollection (io.anserini.collection.TweetCollection)1 IndexCollection (io.anserini.index.IndexCollection)1 BibtexGenerator (io.anserini.index.generator.BibtexGenerator)1 C4Generator (io.anserini.index.generator.C4Generator)1 TweetGenerator (io.anserini.index.generator.TweetGenerator)1 SearchSolr (io.anserini.search.SearchSolr)1