Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -647,13 +647,13 @@ func mergeStoredAndRemap(segments []*SegmentBase, drops []*roaring.Bitmap,
return 0, nil, seg.ErrClosed
}
// get the edgeList for this segment
edgeList := segment.EdgeList()
edgeList := segment.edgeList()
// if no edgeList, nothing to do
if edgeList == nil {
continue
}
newSegDocNums := rv[segI]
edgeList.Iterate(func(oldChild uint64, oldParent uint64) bool {
edgeList.iterate(func(oldChild uint64, oldParent uint64) bool {
newParent := newSegDocNums[oldParent]
newChild := newSegDocNums[oldChild]
if newParent != docDropped &&
Expand Down
187 changes: 136 additions & 51 deletions nested_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,12 @@ func (nc *nestedIndexCache) initialize(numDocs uint64, edgeListOffset uint64, me
return fmt.Errorf("error reading number of edges in nested edge list")
}
pos += uint64(read)
// if no documents or edges/nested documents, return
if numDocs == 0 || numEdges == 0 {
// if no documents or edges/nested documents or invalid state, return
if numDocs == 0 || numEdges == 0 || numDocs <= numEdges {
return nil
}
edgeList := NewEdgeList(numDocs, numEdges)
// create and cache our edge list
edgeList := newEdgeList(numDocs, numEdges)
for i := uint64(0); i < numEdges; i++ {
child, read := binary.Uvarint(mem[pos : pos+binary.MaxVarintLen64])
if read <= 0 {
Expand All @@ -68,17 +69,49 @@ func (nc *nestedIndexCache) initialize(numDocs uint64, edgeListOffset uint64, me
return fmt.Errorf("error reading parent doc id in nested edge list")
}
pos += uint64(read)
edgeList.AddEdge(child, parent)
edgeList.addEdge(child, parent)
}
// create and cache our descendant store
numRoots := numDocs - numEdges
descendantStore := newDescendantStore(numRoots)
// populate the descendant store using the following invariants:
// Invariant: child docNums is always > parent docNums
// Invariant: descendants of root docNum R is always a
// contiguous range of docNums [R + 1 : R + 1 + N)
// where N is the number of descendants of R
roots := make([]uint64, 0, numRoots)
for docNum := uint64(0); docNum < numDocs; docNum++ {
if _, ok := edgeList.parent(docNum); !ok {
roots = append(roots, docNum)
}
}
// descendants of each root are the contiguous range of
// docNums between it and the next root
for idx, root := range roots {
start := root + 1
end := numDocs
nextIdx := idx + 1
if nextIdx < len(roots) {
end = roots[nextIdx]
}
numDescendants := end - start
if numDescendants > 0 {
bitmap := roaring.New()
bitmap.AddRange(start, end)
descendantStore.add(root, bitmap)
}
}

nc.cache = &nestedCacheEntry{
el: edgeList,
ds: descendantStore,
}
return nil
}

type nestedCacheEntry struct {
// edgeList[child] = parent
el EdgeList
el edgeList
ds descendantStore
}

func (nc *nestedIndexCache) ancestry(docNum uint64, prealloc []index.AncestorID) []index.AncestorID {
Expand All @@ -90,7 +123,7 @@ func (nc *nestedIndexCache) ancestry(docNum uint64, prealloc []index.AncestorID)
}
current := docNum
for {
parent, ok := cache.el.Parent(current)
parent, ok := cache.el.parent(current)
if !ok {
break
}
Expand All @@ -100,44 +133,63 @@ func (nc *nestedIndexCache) ancestry(docNum uint64, prealloc []index.AncestorID)
return prealloc
}

func (nc *nestedIndexCache) edgeList() EdgeList {
func (nc *nestedIndexCache) descendants(root uint64) (*roaring.Bitmap, bool) {
cache := nc.cache
if cache == nil || cache.ds == nil {
return nil, false
}
return cache.ds.descendants(root)
}

func (nc *nestedIndexCache) edgeList() edgeList {
cache := nc.cache
if cache == nil || cache.el == nil {
return nil
}
return cache.el
}

func (nc *nestedIndexCache) descendantStore() descendantStore {
cache := nc.cache
if cache == nil || cache.ds == nil {
return nil
}
return cache.ds
}

func (nc *nestedIndexCache) countNested() uint64 {
cache := nc.cache
if cache == nil || cache.el == nil {
return 0
}
return cache.el.Count()
return cache.el.count()
}

// countRoot returns the number of root documents in the given bitmap
func (nc *nestedIndexCache) countRoot(bm *roaring.Bitmap) uint64 {
var totalDocs uint64
if bm == nil {
// if bitmap is empty, return 0
// countRootDeleted returns the number of root documents in the given bitmap that are deleted
func (nc *nestedIndexCache) countRootDeleted(bm *roaring.Bitmap) uint64 {
// empty bitmap means no root documents
if bm == nil || bm.IsEmpty() {
return 0
}
totalDocs := bm.GetCardinality()
// if no nested documents, all documents in the bitmap are root documents
if nc.countNested() == 0 {
return totalDocs
}
totalDocs = bm.GetCardinality()
cache := nc.cache
if cache == nil || cache.el == nil {
// if cache is nil, no nested docs, so all docs are root docs
// so just return the cardinality of the bitmap
// get the edgeList for this segment
el := nc.edgeList()
if el == nil {
return totalDocs
}
// count nested documents in the bitmap, a nested doc is one that has a parent in the edge list
var nestedDocCount uint64
bm.Iterate(func(docNum uint32) bool {
if _, ok := cache.el.Parent(uint64(docNum)); ok {
bmItr := bm.Iterator()
for bmItr.HasNext() {
docNum := bmItr.Next()
if _, ok := el.parent(uint64(docNum)); ok {
nestedDocCount++
Comment thread
CascadingRadium marked this conversation as resolved.
}
return true
})
}
// root docs = total docs - nested docs
if totalDocs < nestedDocCount {
// should not happen, but just in case
Expand All @@ -148,21 +200,21 @@ func (nc *nestedIndexCache) countRoot(bm *roaring.Bitmap) uint64 {

// -------------------------------------------------------

// EdgeList provides an interface to access parent of a child document
type EdgeList interface {
// Parent returns the parent of the given child document ID,
// edgeList provides an interface to access parent of a child document
type edgeList interface {
// parent returns the parent of the given child document ID,
// and a boolean indicating if the parent exists.
Parent(child uint64) (uint64, bool)
parent(child uint64) (uint64, bool)

// AddEdge adds an edge from child to parent in the edge list.
AddEdge(child uint64, parent uint64)
// addEdge adds an edge from child to parent in the edge list.
addEdge(child uint64, parent uint64)

// Count returns the number of edges in the edge list.
Count() uint64
// count returns the number of edges in the edge list.
count() uint64

// Iterate iterates over all edges in the edge list, calling the provided function
// iterate iterates over all edges in the edge list, calling the provided function
// with each child-parent pair. If the function returns false, iteration stops.
Iterate(func(child uint64, parent uint64) bool)
iterate(func(child uint64, parent uint64) bool)
}

type edgeListMap struct {
Expand All @@ -175,20 +227,20 @@ func newEdgeListMap(numEdges uint64) *edgeListMap {
}
}

func (elm *edgeListMap) Parent(child uint64) (uint64, bool) {
func (elm *edgeListMap) parent(child uint64) (uint64, bool) {
parent, ok := elm.edges[child]
return parent, ok
}

func (elm *edgeListMap) AddEdge(child uint64, parent uint64) {
func (elm *edgeListMap) addEdge(child uint64, parent uint64) {
elm.edges[child] = parent
}

func (elm *edgeListMap) Count() uint64 {
func (elm *edgeListMap) count() uint64 {
return uint64(len(elm.edges))
}

func (elm *edgeListMap) Iterate(f func(child uint64, parent uint64) bool) {
func (elm *edgeListMap) iterate(f func(child uint64, parent uint64) bool) {
for child, parent := range elm.edges {
if !f(child, parent) {
return
Expand All @@ -197,7 +249,7 @@ func (elm *edgeListMap) Iterate(f func(child uint64, parent uint64) bool) {
}

type edgeListSlice struct {
count uint64
numEdges uint64
sentinel uint64
edges []uint64
}
Expand All @@ -209,13 +261,13 @@ func newEdgeListSlice(numDocs uint64, numEdges uint64) *edgeListSlice {
edges[i] = sentinel
}
return &edgeListSlice{
count: numEdges,
numEdges: numEdges,
sentinel: sentinel,
edges: edges,
}
}

func (els *edgeListSlice) Parent(child uint64) (uint64, bool) {
func (els *edgeListSlice) parent(child uint64) (uint64, bool) {
if child >= uint64(len(els.edges)) {
return 0, false
}
Expand All @@ -226,29 +278,29 @@ func (els *edgeListSlice) Parent(child uint64) (uint64, bool) {
return parent, true
}

func (el *edgeListSlice) AddEdge(child uint64, parent uint64) {
if child >= uint64(len(el.edges)) {
func (els *edgeListSlice) addEdge(child uint64, parent uint64) {
if child >= uint64(len(els.edges)) {
// out of bounds, ignore as this should not happen
return
}
el.edges[child] = parent
els.edges[child] = parent
}

func (el *edgeListSlice) Count() uint64 {
return el.count
func (els *edgeListSlice) count() uint64 {
return els.numEdges
}

func (el *edgeListSlice) Iterate(f func(child uint64, parent uint64) bool) {
for child, parent := range el.edges {
if parent != el.sentinel {
func (els *edgeListSlice) iterate(f func(child uint64, parent uint64) bool) {
for child, parent := range els.edges {
if parent != els.sentinel {
if !f(uint64(child), parent) {
return
}
}
}
}

// nestedCacheRatio defines the threshold ratio of nested documents to total documents.
// edgeListMapThreshold defines the threshold ratio of nested documents to total documents.
// It is derived using the following reasoning:
//
// Let N = number of nested documents (i.e., edges in the edge list)
Expand All @@ -273,9 +325,9 @@ func (el *edgeListSlice) Iterate(f func(child uint64, parent uint64) bool) {
// we use a map for the edge list; otherwise, we use a slice.
var edgeListMapThreshold = 8.0 / 30.0

// NewEdgeList creates a new EdgeList instance based on the provided
// newEdgeList creates a new edgeList instance based on the provided
// constants, the total number of documents and the number of nested documents/edges.
func NewEdgeList(numDocs uint64, numEdges uint64) EdgeList {
func newEdgeList(numDocs uint64, numEdges uint64) edgeList {
if numDocs == 0 || numEdges == 0 {
// no edges, return nil
return nil
Expand All @@ -288,3 +340,36 @@ func NewEdgeList(numDocs uint64, numEdges uint64) EdgeList {
// use slice representation
return newEdgeListSlice(numDocs, numEdges)
}

// -------------------------------------------------------

// descendantStore provides an interface to access precomputed descendant bitmaps for root documents
type descendantStore interface {
// add a descendant bitmap for a root document
add(root uint64, descendants *roaring.Bitmap)
// returns the descendant bitmap for a root document, with an indication of its existence
descendants(root uint64) (*roaring.Bitmap, bool)
}

type descendantStoreMap struct {
m map[uint64]*roaring.Bitmap
}

func newDescendantStoreMap(numRoots uint64) *descendantStoreMap {
return &descendantStoreMap{
m: make(map[uint64]*roaring.Bitmap, numRoots),
}
}

func (dsm *descendantStoreMap) add(root uint64, descendants *roaring.Bitmap) {
dsm.m[root] = descendants
}

func (dsm *descendantStoreMap) descendants(root uint64) (*roaring.Bitmap, bool) {
bm, ok := dsm.m[root]
return bm, ok
}

func newDescendantStore(numRoots uint64) descendantStore {
return newDescendantStoreMap(numRoots)
}
Loading
Loading