use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class CoreGeneratorTest method setUp.
@Before
public void setUp() throws Exception {
ObjectMapper mapper = new ObjectMapper();
ObjectNode coreJsonObj = mapper.createObjectNode();
coreJsonObj.set("coreId", TextNode.valueOf("id_text"));
coreJsonObj.set("doi", TextNode.valueOf("doi_text"));
coreJsonObj.set("oai", TextNode.valueOf("oai_text"));
coreJsonObj.set("title", TextNode.valueOf("every startup ever"));
coreJsonObj.set("abstract", TextNode.valueOf("machine learning blockchain quantum vr"));
coreJsonObj.set("year", IntNode.valueOf(2020));
coreJsonObj.set("authors", mapper.createArrayNode().add("Elon Musk").add("Mark Zuckerberg"));
coreJsonObj.set("contributors", mapper.createArrayNode());
coreJsonObj.set("publisher", NullNode.getInstance());
coreJsonObj.set("datePublished", TextNode.valueOf("2020-01-01"));
coreJsonObj.set("pdfHashValue", TextNode.valueOf("abc"));
coreJsonObj.set("downloadUrl", NullNode.getInstance());
coreJsonObj.set("topics", mapper.createArrayNode().add("Machine Learning").add("Blockchain"));
coreJsonObj.set("subjects", mapper.createArrayNode().add("Quantum").add("VR"));
coreJsonObj.set("journals", mapper.createArrayNode().add("journal"));
coreJsonObj.set("identifiers", mapper.createArrayNode());
coreJsonObj.set("language", mapper.createObjectNode());
coreJsonObj.set("relations", mapper.createObjectNode().set("sample", TextNode.valueOf("text")));
coreJsonObj.set("fullTextIdentifier", NullNode.getInstance());
coreJsonObj.set("enrichments", ((ObjectNode) mapper.createObjectNode().set("references", mapper.createArrayNode())).set("documentType", ((ObjectNode) mapper.createObjectNode().set("type", NullNode.getInstance())).set("confidence", NullNode.getInstance())));
coreDoc = new CoreCollection.Document(coreJsonObj);
CoreGenerator generator = new CoreGenerator(new IndexArgs());
doc = generator.createDocument(coreDoc);
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class C4EndToEndTest method getIndexArgs.
@Override
protected IndexArgs getIndexArgs() {
IndexArgs indexArgs = createDefaultIndexArgs();
indexArgs.input = "src/test/resources/sample_docs/c4";
indexArgs.collectionClass = C4Collection.class.getSimpleName();
indexArgs.generatorClass = C4Generator.class.getSimpleName();
indexArgs.shardCount = 2;
indexArgs.shardCurrent = 1;
return indexArgs;
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class EndToEndTest method createDefaultIndexArgs.
protected IndexArgs createDefaultIndexArgs() {
IndexArgs args = new IndexArgs();
args.storePositions = true;
args.storeDocvectors = true;
args.storeContents = true;
args.storeRaw = true;
args.optimize = true;
args.quiet = true;
return args;
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class EndToEndTest method setUp.
@Override
@Before
public void setUp() throws Exception {
Locale.setDefault(Locale.US);
// We're going to build an index for every test.
super.setUp();
indexPath = "test-index" + RANDOM.nextInt(100000);
cleanup.clear();
// Subclasses will override this method and change their own settings.
IndexArgs indexArgs = getIndexArgs();
// Note, since we want to test end-to-end, we're going to generate command-line parameters to feed back into main.
List<String> args = new ArrayList<>(List.of("-index", indexPath, "-input", indexArgs.input, "-threads", "2", "-language", indexArgs.language, "-collection", indexArgs.collectionClass, "-generator", indexArgs.generatorClass));
if (indexArgs.tweetMaxId != Long.MAX_VALUE) {
args.add("-tweet.maxId");
args.add(indexArgs.tweetMaxId + "");
}
if (indexArgs.whitelist != null) {
args.add("-whitelist");
args.add(indexArgs.whitelist);
}
if (indexArgs.storePositions) {
args.add("-storePositions");
}
if (indexArgs.storeDocvectors) {
args.add("-storeDocvectors");
}
if (indexArgs.storeContents) {
args.add("-storeContents");
}
if (indexArgs.storeRaw) {
args.add("-storeRaw");
}
if (indexArgs.keepStopwords) {
args.add("-keepStopwords");
}
if (indexArgs.stopwords != null) {
args.add("-stopwords");
args.add(indexArgs.stopwords);
}
if (indexArgs.optimize) {
args.add("-optimize");
}
if (indexArgs.quiet) {
args.add("-quiet");
}
if (indexArgs.shardCount > 1) {
args.add("-shard.count");
args.add(Integer.toString(indexArgs.shardCount));
args.add("-shard.current");
args.add(Integer.toString(indexArgs.shardCurrent));
}
if (indexArgs.pretokenized) {
args.add("-pretokenized");
}
IndexCollection.main(args.toArray(new String[args.size()]));
}
use of io.anserini.index.IndexArgs in project Anserini by castorini.
the class TrecEndToEndPassageTest method getIndexArgs.
@Override
protected IndexArgs getIndexArgs() {
IndexArgs indexArgs = createDefaultIndexArgs();
indexArgs.input = "src/test/resources/sample_docs/trec/collection3";
indexArgs.collectionClass = TrecCollection.class.getSimpleName();
return indexArgs;
}
Aggregations