classVersion { public: structGetStats { FileMetaData* seek_file; int seek_file_level; }; ... ... // DOC: next_ 和 prev_ 用于双向链表 VersionSet* vset_; // VersionSet to which this Version belongs Version* next_; // Next version in linked list Version* prev_; // Previous version in linked list // DOC: refs_ 记录了被不同的 Version 引用的个数,保证被引用中的文件不会被删除 int refs_; // Number of live refs to this version
classVersionSet { ... ... Env* const env_; conststd::string dbname_; const Options* const options_; TableCache* const table_cache_; const InternalKeyComparator icmp_; uint64_t next_file_number_; uint64_t manifest_file_number_; uint64_t last_sequence_; uint64_t log_number_; uint64_t prev_log_number_; // 0 or backing store for memtable being compacted
// Opened lazily WritableFile* descriptor_file_; log::Writer* descriptor_log_; Version ; // Head of circular doubly-linked list of versions. Version* current_; // == dummy_versions_.prev_
// Per-level key at which the next compaction at that level should start. // Either an empty string, or a valid InternalKey. std::string compact_pointer_[config::kNumLevels]; };
VersionSet 是一个 Version 构成的循环双向链表,这些 Version 按时间顺序先后产生,记录了当时的元信息,链表尾部是当前最新的 Version;每个 Version 自己会维护引用计数,当其被引用时不会被删除,其对应的 sstable 也得以保留;通过这种方式,使得 leveldb 可以在任意一个稳定的快照视图上(即任意一个未被删除的 Version 上)访问文件。
如何从 Version_i 升级到 Version_i+1
相邻 Version 之间的不同仅仅是一些文件被创建和另一些文件被删除。也就是说将文件变动应用在旧的 Version 上可以得到新的 Version,这也就是 Version 产生的方式。leveldb 用 VersionEdit 来表示这种相邻 Version 的差值;
// Save the contents of the memtable as a new Table VersionEdit edit; Version* base = versions_->current(); base->Ref(); Status s = WriteLevel0Table(imm_, &edit, base); base->Unref(); ... ... // Replace immutable memtable with the generated Table if (s.ok()) { edit.SetPrevLogNumber(0); edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed s = versions_->LogAndApply(&edit, &mutex_); }
if (s.ok()) { // Commit to the new state imm_->Unref(); imm_ = nullptr; has_imm_.store(false, std::memory_order_release); RemoveObsoleteFiles(); } else { RecordBackgroundError(s); } }
Compaction* VersionSet::PickCompaction(){ Compaction* c; int level;
// We prefer compactions triggered by too much data in a level over // the compactions triggered by seeks. constbool size_compaction = (current_->compaction_score_ >= 1); constbool seek_compaction = (current_->file_to_compact_ != nullptr); if (size_compaction) { level = current_->compaction_level_; assert(level >= 0); assert(level + 1 < config::kNumLevels); c = new Compaction(options_, level);
// Pick the first file that comes after compact_pointer_[level] for (size_t i = 0; i < current_->files_[level].size(); i++) { FileMetaData* f = current_->files_[level][i]; // DOC: 从 level i 层选择输入文件 if (compact_pointer_[level].empty() || // DOC: 如果 level i 层第一次做 compaction icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) { // DOC: 如果 sstable 的最大的 key > compact_pointer_ c->inputs_[0].push_back(f); break; } } if (c->inputs_[0].empty()) { // Wrap-around to the beginning of the key space c->inputs_[0].push_back(current_->files_[level][0]); } } elseif (seek_compaction) { level = current_->file_to_compact_level_; c = new Compaction(options_, level); c->inputs_[0].push_back(current_->file_to_compact_); } else { returnnullptr; }
// Files in level 0 may overlap each other, so pick up all overlapping ones // DOC: level 0 层可能出现 sstable 之间 overlap 的情况,因此在 level 0 层就可以扩大输入文件集合 if (level == 0) { InternalKey smallest, largest; GetRange(c->inputs_[0], &smallest, &largest); // Note that the next call will discard the file we placed in // c->inputs_[0] earlier and replace it with an overlapping set // which will include the picked file. current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]); assert(!c->inputs_[0].empty()); }
// DOC: 在 level i + 1 层扩到输入文件集合 SetupOtherInputs(c);