Add File
This commit is contained in:
@@ -0,0 +1,212 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023-2025, Agents-Flex (fuhai999@gmail.com).
|
||||||
|
* <p>
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package com.agentsflex.search.engine.lucene;
|
||||||
|
|
||||||
|
import com.agentsflex.core.document.Document;
|
||||||
|
import com.agentsflex.search.engine.service.DocumentSearcher;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.document.TextField;
|
||||||
|
import org.apache.lucene.index.*;
|
||||||
|
import org.apache.lucene.queryparser.classic.ParseException;
|
||||||
|
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||||
|
import org.apache.lucene.search.*;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
import org.jetbrains.annotations.NotNull;
|
||||||
|
import org.lionsoul.jcseg.ISegment;
|
||||||
|
import org.lionsoul.jcseg.analyzer.JcsegAnalyzer;
|
||||||
|
import org.lionsoul.jcseg.dic.DictionaryFactory;
|
||||||
|
import org.lionsoul.jcseg.segmenter.SegmenterConfig;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
public class LuceneSearcher implements DocumentSearcher {
|
||||||
|
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(LuceneSearcher.class);
|
||||||
|
|
||||||
|
private Directory directory;
|
||||||
|
|
||||||
|
public LuceneSearcher(LuceneConfig config) {
|
||||||
|
Objects.requireNonNull(config, "LuceneConfig 不能为 null");
|
||||||
|
try {
|
||||||
|
String indexDirPath = config.getIndexDirPath(); // 索引目录路径
|
||||||
|
File indexDir = new File(indexDirPath);
|
||||||
|
if (!indexDir.exists() && !indexDir.mkdirs()) {
|
||||||
|
throw new IllegalStateException("can not mkdirs for path: " + indexDirPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.directory = FSDirectory.open(indexDir.toPath());
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("初始化 Lucene 索引失败", e);
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean addDocument(Document document) {
|
||||||
|
if (document == null || document.getContent() == null) return false;
|
||||||
|
|
||||||
|
IndexWriter indexWriter = null;
|
||||||
|
try {
|
||||||
|
indexWriter = createIndexWriter();
|
||||||
|
|
||||||
|
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
|
||||||
|
luceneDoc.add(new StringField("id", document.getId().toString(), Field.Store.YES));
|
||||||
|
luceneDoc.add(new TextField("content", document.getContent(), Field.Store.YES));
|
||||||
|
|
||||||
|
if (document.getTitle() != null) {
|
||||||
|
luceneDoc.add(new TextField("title", document.getTitle(), Field.Store.YES));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
indexWriter.addDocument(luceneDoc);
|
||||||
|
indexWriter.commit();
|
||||||
|
return true;
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("添加文档失败", e);
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
close(indexWriter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean deleteDocument(Object id) {
|
||||||
|
if (id == null) return false;
|
||||||
|
|
||||||
|
IndexWriter indexWriter = null;
|
||||||
|
try {
|
||||||
|
indexWriter = createIndexWriter();
|
||||||
|
Term term = new Term("id", id.toString());
|
||||||
|
indexWriter.deleteDocuments(term);
|
||||||
|
indexWriter.commit();
|
||||||
|
return true;
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("删除文档失败", e);
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
close(indexWriter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean updateDocument(Document document) {
|
||||||
|
if (document == null || document.getId() == null) return false;
|
||||||
|
|
||||||
|
IndexWriter indexWriter = null;
|
||||||
|
try {
|
||||||
|
indexWriter = createIndexWriter();
|
||||||
|
Term term = new Term("id", document.getId().toString());
|
||||||
|
|
||||||
|
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
|
||||||
|
luceneDoc.add(new StringField("id", document.getId().toString(), Field.Store.YES));
|
||||||
|
luceneDoc.add(new TextField("content", document.getContent(), Field.Store.YES));
|
||||||
|
|
||||||
|
if (document.getTitle() != null) {
|
||||||
|
luceneDoc.add(new TextField("title", document.getTitle(), Field.Store.YES));
|
||||||
|
}
|
||||||
|
|
||||||
|
indexWriter.updateDocument(term, luceneDoc);
|
||||||
|
indexWriter.commit();
|
||||||
|
return true;
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("更新文档失败", e);
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
close(indexWriter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<Document> searchDocuments(String keyword, int count) {
|
||||||
|
List<Document> results = new ArrayList<>();
|
||||||
|
try (IndexReader reader = DirectoryReader.open(directory)) {
|
||||||
|
IndexSearcher searcher = new IndexSearcher(reader);
|
||||||
|
Query query = buildQuery(keyword);
|
||||||
|
TopDocs topDocs = searcher.search(query, count);
|
||||||
|
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||||
|
org.apache.lucene.document.Document doc = searcher.doc(scoreDoc.doc);
|
||||||
|
Document resultDoc = new Document();
|
||||||
|
resultDoc.setId(doc.get("id"));
|
||||||
|
resultDoc.setContent(doc.get("content"));
|
||||||
|
resultDoc.setTitle(doc.get("title"));
|
||||||
|
|
||||||
|
resultDoc.setScore((double) scoreDoc.score);
|
||||||
|
|
||||||
|
results.add(resultDoc);
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error("搜索文档失败", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Query buildQuery(String keyword) {
|
||||||
|
try {
|
||||||
|
Analyzer analyzer = createAnalyzer();
|
||||||
|
|
||||||
|
QueryParser titleQueryParser = new QueryParser("title", analyzer);
|
||||||
|
Query titleQuery = titleQueryParser.parse(keyword);
|
||||||
|
BooleanClause titleBooleanClause = new BooleanClause(titleQuery, BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
|
QueryParser contentQueryParser = new QueryParser("content", analyzer);
|
||||||
|
Query contentQuery = contentQueryParser.parse(keyword);
|
||||||
|
BooleanClause contentBooleanClause = new BooleanClause(contentQuery, BooleanClause.Occur.SHOULD);
|
||||||
|
|
||||||
|
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||||
|
builder.add(titleBooleanClause)
|
||||||
|
.add(contentBooleanClause);
|
||||||
|
return builder.build();
|
||||||
|
} catch (ParseException e) {
|
||||||
|
LOG.error(e.toString(), e);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@NotNull
|
||||||
|
private IndexWriter createIndexWriter() throws IOException {
|
||||||
|
Analyzer analyzer = createAnalyzer();
|
||||||
|
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
|
||||||
|
return new IndexWriter(directory, indexWriterConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private static Analyzer createAnalyzer() {
|
||||||
|
SegmenterConfig config = new SegmenterConfig(true);
|
||||||
|
return new JcsegAnalyzer(ISegment.Type.NLP, config, DictionaryFactory.createSingletonDictionary(config));
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close(IndexWriter indexWriter) {
|
||||||
|
try {
|
||||||
|
if (indexWriter != null) {
|
||||||
|
indexWriter.close();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("关闭 Lucene 失败", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user