Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,17 @@ CXXFLAGS = -std=c++17 -Wall -Wextra -O2 -I.
BUILD_DIR = build

DBMS_OBJS = $(BUILD_DIR)/main.o $(BUILD_DIR)/src/parser.o
TEST_OBJS = $(BUILD_DIR)/tests/test_parser.o $(BUILD_DIR)/src/parser.o
TEST_OBJS = $(BUILD_DIR)/tests/test_parser.o \
$(BUILD_DIR)/tests/storage/test_disk_manager.o \
$(BUILD_DIR)/tests/storage/test_buffer_pool.o \
$(BUILD_DIR)/tests/storage/test_slotted_page.o \
$(BUILD_DIR)/tests/storage/test_heap_file.o \
$(BUILD_DIR)/tests/storage/test_integration.o \
$(BUILD_DIR)/src/parser.o \
$(BUILD_DIR)/src/storage/disk_manager.o \
$(BUILD_DIR)/src/storage/buffer_pool.o \
$(BUILD_DIR)/src/storage/slotted_page.o \
$(BUILD_DIR)/src/storage/heap_file.o

dbms: $(DBMS_OBJS)
$(CXX) $(CXXFLAGS) -o $@ $^
Expand Down
64 changes: 64 additions & 0 deletions src/storage/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Storage

Four-layer stack. Each layer exposes a small surface to the one above and
hides everything below.

```
┌──────────────────────────────────────────────────────────────────┐
│ heap_file.h HeapFile table abstraction; insert / get / scan │
├──────────────────────────────────────────────────────────────────┤
│ slotted_page.h SlottedPage byte layout inside one page │
├──────────────────────────────────────────────────────────────────┤
│ buffer_pool.h BufferPool page cache with LRU eviction + pinning │
├──────────────────────────────────────────────────────────────────┤
│ disk_manager.h DiskManager read/write fixed-size pages on a file │
└──────────────────────────────────────────────────────────────────┘
```

## What each layer owns

- **DiskManager** — one file, page id ↔ byte offset. Has no opinion on what's
in a page.
- **BufferPool** — fixed array of `Frame`s caching pages from disk. Hands out
`Frame*` via `pin()`/`pinNew()`, returning a `PageGuard` (RAII; calls
`unpinPage` on scope exit). Evicts the LRU unpinned frame on a miss; writes
dirty frames back when evicted or on `flushAll()`.
- **SlottedPage** — wraps a `char*` of `PAGE_SIZE` bytes. Header + slot array
growing forward, tuple bytes packed at the high end. Slot ids are stable
across compaction; `remove` tombstones; insert auto-compacts when needed.
The 12-byte header includes a `next_page_id` field used by HeapFile to
chain pages.
- **HeapFile** — unordered collection of tuples spread across a chain of
slotted pages. Allocates and links new pages as the chain fills. RIDs
(`{page_id, slot_id}`) are stable for the life of a tuple. Provides a
forward iterator that yields every live tuple in (page, slot) order.

## Insert flow

```
HeapFile::insert(bytes, len)
└─> walk page chain via SlottedPage::nextPageId()
└─> BufferPool::pin(page) → Frame
└─> SlottedPage::insert(bytes, len) → SlotId
↑ if no page has room: BufferPool::pinNew()
and link via setNextPageId()
```

A miss inside `pin()` triggers `DiskManager::readPage()`; an eviction of a
dirty frame triggers `DiskManager::writePage()`.

## Read flow

```
HeapFile::get(rid) HeapFile::Iterator::operator++
BufferPool::pin(rid.page) BufferPool::pin(cur_page)
SlottedPage::get(rid.slot) SlottedPage::get(cur_slot)
→ if exhausted: cur_page = nextPageId()
```

## Tests

Each layer has its own test file under `tests/storage/`. The end-to-end
persistence test lives in `tests/storage/test_integration.cpp` (loads 1000
rows, simulates a program restart by destroying every storage object, then
scans the rows back).
174 changes: 174 additions & 0 deletions src/storage/buffer_pool.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#include "src/storage/buffer_pool.h"

#include <cstring>
#include <stdexcept>
#include <string>

BufferPool::BufferPool(size_t num_frames, DiskManager* disk)
: disk_(disk), frames_(num_frames) {
if (num_frames == 0) {
throw std::runtime_error("BufferPool: num_frames must be > 0");
}
if (disk_ == nullptr) {
throw std::runtime_error("BufferPool: disk must not be null");
}
for (size_t i = 0; i < num_frames; ++i) {
Frame& f = frames_[i];
f.page_id = INVALID_PAGE_ID;
f.is_dirty = false;
f.pin_count = 0;
std::memset(f.data, 0, PAGE_SIZE);
addToLRU(i);
}
}

void BufferPool::removeFromLRU(size_t frame_idx) {
auto it = lru_pos_.find(frame_idx);
if (it == lru_pos_.end()) return;
lru_.erase(it->second);
lru_pos_.erase(it);
}

void BufferPool::addToLRU(size_t frame_idx) {
lru_.push_back(frame_idx);
lru_pos_[frame_idx] = std::prev(lru_.end());
}

size_t BufferPool::pickVictim() {
if (lru_.empty()) {
throw std::runtime_error("BufferPool: all frames pinned, no victim available");
}
return lru_.front();
}

void BufferPool::evict(size_t frame_idx) {
Frame& f = frames_[frame_idx];
if (f.page_id == INVALID_PAGE_ID) return; // empty slot, nothing to evict
if (f.is_dirty) {
disk_->writePage(f.page_id, f.data);
f.is_dirty = false;
}
page_table_.erase(f.page_id);
f.page_id = INVALID_PAGE_ID;
}

Frame* BufferPool::fetchPage(PageId page_id) {
auto it = page_table_.find(page_id);
if (it != page_table_.end()) {
// Cache hit. If currently unpinned, pull it out of the LRU list so
// it can't be evicted while pinned.
size_t idx = it->second;
Frame& f = frames_[idx];
if (f.pin_count == 0) {
removeFromLRU(idx);
}
++f.pin_count;
return &f;
}

// Miss: pick a victim and load the page into its slot.
size_t idx = pickVictim();
removeFromLRU(idx);
evict(idx);

Frame& f = frames_[idx];
disk_->readPage(page_id, f.data);
f.page_id = page_id;
f.is_dirty = false;
f.pin_count = 1;
page_table_[page_id] = idx;
return &f;
}

void BufferPool::unpinPage(PageId page_id, bool was_modified) {
auto it = page_table_.find(page_id);
if (it == page_table_.end()) {
throw std::runtime_error("BufferPool::unpinPage: page " +
std::to_string(page_id) + " not in pool");
}
size_t idx = it->second;
Frame& f = frames_[idx];
if (f.pin_count <= 0) {
throw std::runtime_error("BufferPool::unpinPage: page " +
std::to_string(page_id) + " not pinned");
}
if (was_modified) f.is_dirty = true;
--f.pin_count;
if (f.pin_count == 0) {
addToLRU(idx);
}
}

Frame* BufferPool::newPage(PageId* out_page_id) {
PageId new_id = disk_->allocatePage();
if (out_page_id) *out_page_id = new_id;

size_t idx = pickVictim();
removeFromLRU(idx);
evict(idx);

Frame& f = frames_[idx];
std::memset(f.data, 0, PAGE_SIZE);
f.page_id = new_id;
f.is_dirty = false;
f.pin_count = 1;
page_table_[new_id] = idx;
return &f;
}

void BufferPool::flushPage(PageId page_id) {
auto it = page_table_.find(page_id);
if (it == page_table_.end()) return;
Frame& f = frames_[it->second];
if (f.is_dirty) {
disk_->writePage(f.page_id, f.data);
f.is_dirty = false;
}
}

void BufferPool::flushAll() {
for (auto& f : frames_) {
if (f.page_id != INVALID_PAGE_ID && f.is_dirty) {
disk_->writePage(f.page_id, f.data);
f.is_dirty = false;
}
}
}

PageGuard BufferPool::pin(PageId page_id) {
Frame* f = fetchPage(page_id);
return PageGuard(this, f);
}

PageGuard BufferPool::pinNew() {
PageId pid = INVALID_PAGE_ID;
Frame* f = newPage(&pid);
return PageGuard(this, f);
}

PageGuard::~PageGuard() {
if (frame_ && bp_) {
// Frame::is_dirty was already set by markDirty() (if at all), so the
// unpin call doesn't need to OR in any additional bit.
bp_->unpinPage(frame_->page_id, false);
}
}

PageGuard::PageGuard(PageGuard&& other) noexcept
: bp_(other.bp_), frame_(other.frame_) {
other.bp_ = nullptr;
other.frame_ = nullptr;
}

PageGuard& PageGuard::operator=(PageGuard&& other) noexcept {
if (this != &other) {
if (frame_ && bp_) {
bp_->unpinPage(frame_->page_id, false);
}
bp_ = other.bp_;
frame_ = other.frame_;
other.bp_ = nullptr;
other.frame_ = nullptr;
}
return *this;
}
Loading
Loading