Search in sources :

Example 26 with TextField

use of org.apache.lucene.document.TextField in project OpenGrok by OpenGrok.

the class AnalyzerGuru method populateDocument.

/**
     * Populate a Lucene document with the required fields.
     *
     * @param doc The document to populate
     * @param file The file to index
     * @param path Where the file is located (from source root)
     * @param fa The analyzer to use on the file
     * @param xrefOut Where to write the xref (possibly {@code null})
     * @throws IOException If an exception occurs while collecting the data
     */
public void populateDocument(Document doc, File file, String path, FileAnalyzer fa, Writer xrefOut) throws IOException {
    String date = DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND);
    doc.add(new Field(QueryBuilder.U, Util.path2uid(path, date), string_ft_stored_nanalyzed_norms));
    doc.add(new Field(QueryBuilder.FULLPATH, file.getAbsolutePath(), string_ft_nstored_nanalyzed_norms));
    doc.add(new SortedDocValuesField(QueryBuilder.FULLPATH, new BytesRef(file.getAbsolutePath())));
    try {
        HistoryReader hr = HistoryGuru.getInstance().getHistoryReader(file);
        if (hr != null) {
            doc.add(new TextField(QueryBuilder.HIST, hr));
        // date = hr.getLastCommentDate() //RFE
        }
    } catch (HistoryException e) {
        LOGGER.log(Level.WARNING, "An error occurred while reading history: ", e);
    }
    doc.add(new Field(QueryBuilder.DATE, date, string_ft_stored_nanalyzed_norms));
    doc.add(new SortedDocValuesField(QueryBuilder.DATE, new BytesRef(date)));
    if (path != null) {
        doc.add(new TextField(QueryBuilder.PATH, path, Store.YES));
        Project project = Project.getProject(path);
        if (project != null) {
            doc.add(new TextField(QueryBuilder.PROJECT, project.getPath(), Store.YES));
        }
    }
    if (fa != null) {
        Genre g = fa.getGenre();
        if (g == Genre.PLAIN || g == Genre.XREFABLE || g == Genre.HTML) {
            doc.add(new Field(QueryBuilder.T, g.typeName(), string_ft_stored_nanalyzed_norms));
        }
        fa.analyze(doc, StreamSource.fromFile(file), xrefOut);
        String type = fa.getFileTypeName();
        doc.add(new StringField(QueryBuilder.TYPE, type, Store.YES));
    }
}
Also used : SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) StringField(org.apache.lucene.document.StringField) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) Project(org.opensolaris.opengrok.configuration.Project) HistoryException(org.opensolaris.opengrok.history.HistoryException) StringField(org.apache.lucene.document.StringField) SortedDocValuesField(org.apache.lucene.document.SortedDocValuesField) TextField(org.apache.lucene.document.TextField) Genre(org.opensolaris.opengrok.analysis.FileAnalyzer.Genre) BytesRef(org.apache.lucene.util.BytesRef) HistoryReader(org.opensolaris.opengrok.history.HistoryReader)

Example 27 with TextField

use of org.apache.lucene.document.TextField in project OpenGrok by OpenGrok.

the class TarAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
    ArrayList<String> names = new ArrayList<>();
    try (TarInputStream zis = new TarInputStream(src.getStream())) {
        TarEntry entry;
        while ((entry = zis.getNextEntry()) != null) {
            String name = entry.getName();
            names.add(name);
            if (xrefOut != null) {
                Util.htmlize(name, xrefOut);
                xrefOut.append("<br/>");
            }
        }
    }
    doc.add(new TextField("full", new IteratorReader(names)));
}
Also used : IteratorReader(org.opensolaris.opengrok.analysis.IteratorReader) TarInputStream(org.apache.tools.tar.TarInputStream) ArrayList(java.util.ArrayList) TextField(org.apache.lucene.document.TextField) TarEntry(org.apache.tools.tar.TarEntry)

Example 28 with TextField

use of org.apache.lucene.document.TextField in project OpenGrok by OpenGrok.

the class TroffAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
    //this is to explicitely use appropriate analyzers tokenstream to workaround #1376 symbols search works like full text search 
    this.SymbolTokenizer.setReader(getReader(src.getStream()));
    TextField full = new TextField(QueryBuilder.FULL, SymbolTokenizer);
    doc.add(full);
    if (xrefOut != null) {
        try (Reader in = getReader(src.getStream())) {
            writeXref(in, xrefOut);
        }
    }
}
Also used : TextField(org.apache.lucene.document.TextField) Reader(java.io.Reader)

Example 29 with TextField

use of org.apache.lucene.document.TextField in project OpenGrok by OpenGrok.

the class JarAnalyzer method analyze.

@Override
public void analyze(Document doc, StreamSource src, Writer xrefOut) throws IOException {
    try (ZipInputStream zis = new ZipInputStream(src.getStream())) {
        ZipEntry entry;
        while ((entry = zis.getNextEntry()) != null) {
            String ename = entry.getName();
            if (xrefOut != null) {
                xrefOut.append("<br/><b>");
                Util.htmlize(ename, xrefOut);
                xrefOut.append("</b>");
            }
            doc.add(new TextField("full", ename, Store.NO));
            FileAnalyzerFactory fac = AnalyzerGuru.find(ename);
            if (fac instanceof JavaClassAnalyzerFactory) {
                if (xrefOut != null) {
                    xrefOut.append("<br/>");
                }
                JavaClassAnalyzer jca = (JavaClassAnalyzer) fac.getAnalyzer();
                jca.analyze(doc, new BufferedInputStream(zis), xrefOut);
            }
        }
    }
}
Also used : ZipInputStream(java.util.zip.ZipInputStream) BufferedInputStream(java.io.BufferedInputStream) FileAnalyzerFactory(org.opensolaris.opengrok.analysis.FileAnalyzerFactory) ZipEntry(java.util.zip.ZipEntry) TextField(org.apache.lucene.document.TextField)

Example 30 with TextField

use of org.apache.lucene.document.TextField in project OpenGrok by OpenGrok.

the class JavaClassAnalyzer method analyze.

void analyze(Document doc, InputStream in, Writer xrefOut) throws IOException {
    List<String> defs = new ArrayList<>();
    List<String> refs = new ArrayList<>();
    List<String> full = new ArrayList<>();
    ClassParser classparser = new ClassParser(in, doc.get("path"));
    StringWriter out = new StringWriter();
    StringWriter fout = new StringWriter();
    getContent(out, fout, classparser.parse(), defs, refs, full);
    String fullt = fout.toString();
    String xref = out.toString();
    if (xrefOut != null) {
        xrefOut.append(xref);
        try {
            xrefOut.flush();
        } catch (IOException ex) {
            LOGGER.log(Level.WARNING, "Couldn't flush xref, will retry once added to doc", ex);
        }
    }
    //flush the xref        
    xref = null;
    StringWriter cout = new StringWriter();
    for (String fl : full) {
        cout.write(fl);
        cout.write('\n');
    }
    String constants = cout.toString();
    StringReader fullout = new StringReader(fullt);
    doc.add(new TextField("defs", new IteratorReader(defs)));
    doc.add(new TextField("refs", new IteratorReader(refs)));
    doc.add(new TextField("full", fullout));
    doc.add(new TextField("full", constants, Store.NO));
}
Also used : StringWriter(java.io.StringWriter) IteratorReader(org.opensolaris.opengrok.analysis.IteratorReader) ArrayList(java.util.ArrayList) StringReader(java.io.StringReader) TextField(org.apache.lucene.document.TextField) ConstantString(org.apache.bcel.classfile.ConstantString) IOException(java.io.IOException) ClassParser(org.apache.bcel.classfile.ClassParser)

Aggregations

TextField (org.apache.lucene.document.TextField)192 Document (org.apache.lucene.document.Document)171 Directory (org.apache.lucene.store.Directory)99 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)61 Term (org.apache.lucene.index.Term)61 IndexWriter (org.apache.lucene.index.IndexWriter)58 IndexSearcher (org.apache.lucene.search.IndexSearcher)55 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)52 Field (org.apache.lucene.document.Field)50 StringField (org.apache.lucene.document.StringField)48 BytesRef (org.apache.lucene.util.BytesRef)48 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)44 IndexReader (org.apache.lucene.index.IndexReader)43 TermQuery (org.apache.lucene.search.TermQuery)41 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)31 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)30 TopDocs (org.apache.lucene.search.TopDocs)29 RAMDirectory (org.apache.lucene.store.RAMDirectory)29 FieldType (org.apache.lucene.document.FieldType)23 Query (org.apache.lucene.search.Query)23