2021SC@SDUSC
/**
* 根据 <code>conf</code> 中给出的设置构造一个新的 IndexWriter。
* 如果您想对此编写器实例进行“实时”更改,请使用
* {@link #getConfig()}。
*
* <p>
* <b>注意:</b> 编写器创建后,给定的配置实例
* 不能传递给另一个作者。
*
* @参数d
* 索引目录。 创建或附加索引
* 根据<code>conf.getOpenMode()</code>。
* @param conf
* IndexWriter 应该根据的配置设置
* 被初始化。
* @throws IOException
* 如果目录不能被读/写,或者如果不能
* 存在并且 <code>conf.getOpenMode()</code> 是
* <code>OpenMode.APPEND</code> 或者如果有任何其他低级
* IO错误
*/
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException {
enableTestPoints = isEnableTestPoints();
conf.setIndexWriter(this); // prevent reuse by other instances
config = conf;
infoStream = config.getInfoStream();
softDeletesEnabled = config.getSoftDeletesField() != null;
// 获取write.lock。 如果用户配置了超时,
// 我们用一个卧铺包起来,这可能需要一些时间。
writeLock = d.obtainLock(WRITE_LOCK_NAME);
boolean success = false;
try {
directoryOrig = d;
directory = new LockValidatingDirectoryWrapper(d, writeLock);
mergeScheduler = config.getMergeScheduler();
mergeScheduler.initialize(infoStream, directoryOrig);
OpenMode mode = config.getOpenMode();
final boolean indexExists;
final boolean create;
if (mode == OpenMode.CREATE) {
indexExists = DirectoryReader.indexExists(directory);
create = true;
} else if (mode == OpenMode.APPEND) {
indexExists = true;
create = false;
} else {
indexExists = DirectoryReader.indexExists(directory);
create = !indexExists;
}
// 如果索引太旧,读取段会抛出
// IndexFormatTooOldException.
String[] files = directory.listAll();
// 设置我们的初始 SegmentInfos:
IndexCommit commit = config.getIndexCommit();
// 设置我们的初始 SegmentInfos:
StandardDirectoryReader reader;
if (commit == null) {
reader = null;
} else {
reader = commit.getReader();
}
if (create) {
if (config.getIndexCommit() != null) {
// 我们不能同时从提交点打开并创建:
if (mode == OpenMode.CREATE) {
throw new IllegalArgumentException("cannot use IndexWriterConfig.setIndexCommit() with OpenMode.CREATE");
} else {
throw new IllegalArgumentException("cannot use IndexWriterConfig.setIndexCommit() when index has no commit");
}
}
// 尝试先阅读。 这是为了允许创建
// 针对当前打开的索引
// 搜索。 在这种情况下,我们写下一个
// 没有段的segments_N 文件:
final SegmentInfos sis = new SegmentInfos(config.getIndexCreatedVersionMajor());
if (indexExists) {
final SegmentInfos previous = SegmentInfos.readLatestCommit(directory);
sis.updateGenerationVersionAndCounter(previous);
}
segmentInfos = sis;
rollbackSegments = segmentInfos.createBackupSegmentInfos();
// segments) pending:
// 记录我们有一个变化(全部归零
// 段)待定:
changed();
} else if (reader != null) {
// Init from an existing already opened NRT or non-NRT reader:
if (reader.directory() != commit.getDirectory()) {
throw new IllegalArgumentException("IndexCommit's reader must have the same directory as the IndexCommit");
}
if (reader.directory() != directoryOrig) {
throw new IllegalArgumentException("IndexCommit's reader must have the same directory passed to IndexWriter");
}
if (reader.segmentInfos.getLastGeneration() == 0) {
// TODO: maybe we could allow this? It's tricky...
throw new IllegalArgumentException("index must already have an initial commit to open from reader");
}
// 必须克隆,因为我们不希望传入的 NRT 阅读器“看到”此编写器现在所做的任何更改:
segmentInfos = reader.segmentInfos.clone();
SegmentInfos lastCommit;
try {
lastCommit = SegmentInfos.readCommit(directoryOrig, segmentInfos.getSegmentsFileName());
} catch (IOException ioe) {
throw new IllegalArgumentException("the provided reader is stale: its prior commit file \"" + segmentInfos.getSegmentsFileName() + "\" is missing from index");
}
if (reader.writer != null) {
assert reader.writer.closed;
// 更新 SIS 元数据,以便我们保持一次写入:
segmentInfos.updateGenerationVersionAndCounter(reader.writer.segmentInfos);
lastCommit.updateGenerationVersionAndCounter(reader.writer.segmentInfos);
}
rollbackSegments = lastCommit.createBackupSegmentInfos();
} else {
// 从最新的提交点或显式的先前提交点初始化:
String lastSegmentsFile = SegmentInfos.getLastCommitSegmentsFileName(files);
if (lastSegmentsFile == null) {
throw new IndexNotFoundException("no segments* file found in " + directory + ": files: " + Arrays.toString(files));
}
// 不要使用 SegmentInfos.read(Directory),因为它很吓人
// 这里不需要重试(我们持有写锁):
segmentInfos = SegmentInfos.readCommit(directoryOrig, lastSegmentsFile);
if (commit != null) {
// 交换出所有段,但保留元数据
// SegmentInfos,如版本和生成,以
// 保留一次写入。 这很重要,如果
// 读者对未来的提交持开放态度
// 点。
if (commit.getDirectory() != directoryOrig) {
throw new IllegalArgumentException("IndexCommit's directory doesn't match my directory, expected=" + directoryOrig + ", got=" + commit.getDirectory());
}
SegmentInfos oldInfos = SegmentInfos.readCommit(directoryOrig, commit.getSegmentsFileName());
segmentInfos.replace(oldInfos);
changed();
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: loaded commit \"" + commit.getSegmentsFileName() + "\"");
}
}
rollbackSegments = segmentInfos.createBackupSegmentInfos();
}
commitUserData = new HashMap<>(segmentInfos.getUserData()).entrySet();
pendingNumDocs.set(segmentInfos.totalMaxDoc());
// 从以前的字段编号开始,但是新的 FieldInfos
// 注意:即使对于 NRT 阅读器,这也是正确的,因为我们甚至会为未提交的段提取 FieldInfos:
globalFieldNumberMap = getFieldNumberMap();
validateIndexSort();
config.getFlushPolicy().init(config);
bufferedUpdatesStream = new BufferedUpdatesStream(infoStream);
docWriter = new DocumentsWriter(flushNotifications, segmentInfos.getIndexCreatedVersionMajor(), pendingNumDocs,
enableTestPoints, this::newSegmentName,
config, directoryOrig, directory, globalFieldNumberMap);
readerPool = new ReaderPool(directory, directoryOrig, segmentInfos, globalFieldNumberMap,
bufferedUpdatesStream::getCompletedDelGen, infoStream, conf.getSoftDeletesField(), reader);
if (config.getReaderPooling()) {
readerPool.enableReaderPooling();
}
// 默认删除器(为了向后兼容)是
// Sync'd 在这里很愚蠢,但 IFD 断言我们在 IW 实例上同步了:
synchronized(this) {
deleter = new IndexFileDeleter(files, directoryOrig, directory,
config.getIndexDeletionPolicy(),
segmentInfos, infoStream, this,
indexExists, reader != null);
// 当我们从 IW 返回 NRT 读取器时,我们增加了所有文件,因此即使在 NRT 情况下,所有文件也必须存在:
assert create || filesExist(segmentInfos);
}
if (deleter.startingCommitDeleted) {
// 删除策略删除了“head”提交点。
// 我们必须将自己标记为已更改,以便如果我们
// 在没有任何进一步更改的情况下关闭,我们将编写一个新的
// 段 iN 文件。
changed();
}
if (reader != null) {
// 我们总是假设我们在从阅读器打开时继承了传入的更改:
segmentInfos.changed();
changed();
}
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: create=" + create + " reader=" + reader);
messageState();
}
success = true;
} finally {
if (!success) {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "init: hit exception on init; releasing write lock");
}
IOUtils.closeWhileHandlingException(writeLock);
writeLock = null;
}
}
}
/** Confirms that the incoming index sort (if any) matches the existing index sort (if any). */
private void validateIndexSort() {
Sort indexSort = config.getIndexSort();
if (indexSort != null) {
for(SegmentCommitInfo info : segmentInfos) {
Sort segmentIndexSort = info.info.getIndexSort();
if (segmentIndexSort == null || isCongruentSort(indexSort, segmentIndexSort) == false) {
throw new IllegalArgumentException("cannot change previous indexSort=" + segmentIndexSort + " (from segment=" + info + ") to new indexSort=" + indexSort);
}
}
}
}
/**
* Returns true if <code>indexSort</code> is a prefix of <code>otherSort</code>.
**/
static boolean isCongruentSort(Sort indexSort, Sort otherSort) {
final SortField[] fields1 = indexSort.getSort();
final SortField[] fields2 = otherSort.getSort();
if (fields1.length > fields2.length) {
return false;
}
return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
}
// 读取提交的最新字段信息
// 这用于 IW init 和 addIndexes(Dir) 以创建/更新全局字段映射。
// TODO:修复滥用此方法的测试!
static FieldInfos readFieldInfos(SegmentCommitInfo si) throws IOException {
Codec codec = si.info.getCodec();
FieldInfosFormat reader = codec.fieldInfosFormat();
if (si.hasFieldUpdates()) {
// 有更新,我们读取最新的(总是在 CFS 之外)
final String segmentSuffix = Long.toString(si.getFieldInfosGen(), Character.MAX_RADIX);
return reader.read(si.info.dir, si.info, segmentSuffix, IOContext.READONCE);
} else if (si.info.getUseCompoundFile()) {
// cfs
try (Directory cfs = codec.compoundFormat().getCompoundReader(si.info.dir, si.info, IOContext.DEFAULT)) {
return reader.read(cfs, si.info, "", IOContext.READONCE);
}
} else {
// no cfs
return reader.read(si.info.dir, si.info, "", IOContext.READONCE);
}
}
/**
* 加载或返回已加载的此 {@link SegmentInfos} 的全局字段编号映射。
* 如果此 {@link SegmentInfos} 没有全局字段编号映射,则返回的实例为空
*/
private FieldNumbers getFieldNumberMap() throws IOException {
final FieldNumbers map = new FieldNumbers(config.softDeletesField);
for(SegmentCommitInfo info : segmentInfos) {
FieldInfos fis = readFieldInfos(info);
for(FieldInfo fi : fis) {
map.addOrGet(fi.name, fi.number, fi.getIndexOptions(), fi.getDocValuesType(), fi.getPointDimensionCount(), fi.getPointIndexDimensionCount(), fi.getPointNumBytes(), fi.isSoftDeletesField());
}
}
return map;
}
/**
* 返回一个{@link LiveIndexWriterConfig},可以用来查询IndexWriter
* 当前设置,以及修改“实时”设置。
*/
public LiveIndexWriterConfig getConfig() {
ensureOpen(false);
return config;
}
private void messageState() {
if (infoStream.isEnabled("IW") && didMessageState == false) {
didMessageState = true;
infoStream.message("IW", "\ndir=" + directoryOrig + "\n" +
"index=" + segString() + "\n" +
"version=" + Version.LATEST.toString() + "\n" +
config.toString());
final StringBuilder unmapInfo = new StringBuilder(Boolean.toString(MMapDirectory.UNMAP_SUPPORTED));
if (!MMapDirectory.UNMAP_SUPPORTED) {
unmapInfo.append(" (").append(MMapDirectory.UNMAP_NOT_SUPPORTED_REASON).append(")");
}
infoStream.message("IW", "MMapDirectory.UNMAP_SUPPORTED=" + unmapInfo);
}
}
/**
* 优雅地关闭(提交,等待合并),但调用回滚
* 如果有一个 exc 所以 IndexWriter 总是关闭的。 这就是所谓的
* 来自 {@link #close} 当 {@link IndexWriterConfig#commitOnClose} 是
* {@code true}。
*/
private void shutdown() throws IOException {
if (pendingCommit != null) {
throw new IllegalStateException("cannot close: prepareCommit was already called with no corresponding call to commit");
}
// Ensure that only one thread actually gets to do the
// closing
if (shouldClose(true)) {
try {
if (infoStream.isEnabled("IW")) {
infoStream.message("IW", "now flush at close");
}
flush(true, true);
waitForMerges();
commitInternal(config.getMergePolicy());
} catch (Throwable t) {
// Be certain to close the index on any exception
try {
rollbackInternal();
} catch (Throwable t1) {
t.addSuppressed(t1);
}
throw t;
}
rollbackInternal(); // if we got that far lets rollback and close
}
}
409

被折叠的 条评论
为什么被折叠?



