filter empty paths from details entries

repository.BackupDetails now filters its results
to not contain meta or dirmeta items, and to
not contain empty folders, where empty is a
count of zero items after the removal of all
meta items.
This commit is contained in:
ryanfkeepers 2023-02-28 10:54:55 -07:00
parent 916f649e97
commit 8dc4b0d4cd
3 changed files with 179 additions and 18 deletions

View File

@ -305,6 +305,84 @@ func (d *Details) addFolder(folder folderEntry) {
})
}
// FilterMetaFiles returns a new Details struct with a copy of the DetailsModel
// that has had all .meta and .dirmeta files stripped out.
func (d *Details) FilterMetaFiles() *Details {
return &Details{
DetailsModel: d.DetailsModel.FilterMetaFiles(),
}
}
// FilterEmptyContainers returns a new Details struct all empty (ie: containing no
// items) stripped out. If meta files have not been filtered out already, they
// will continue to count as a "populated" container.
func (d *Details) FilterEmptyContainers() *Details {
type entCount struct {
ent DetailsEntry
itemCount int
}
var (
// shortRef: entCount
srec = map[string]entCount{}
items = []DetailsEntry{}
)
// split the entries into items and folders.
// folders are stored in a map by their shortRef for lookup.
for _, ent := range d.Entries {
if ent.Folder == nil {
items = append(items, ent)
} else {
srec[ent.ShortRef] = entCount{ent, 0}
}
}
// for every item, add a count to the owning folder.
// this assumes item parentRef == folder shortRef.
for _, ent := range items {
if len(ent.ParentRef) == 0 {
continue
}
ec := srec[ent.ParentRef]
ec.itemCount++
srec[ent.ParentRef] = ec
// to maintain a hierarchical count so that we don't
// slice parent folders, this loop walks the tree upward
// by parent ref, adding one count to each parent up
// to the root.
parentRef := ec.ent.ParentRef
parentCount := 0
for len(parentRef) > 0 && parentCount == 0 {
ec := srec[parentRef]
// minor optimization: if the parentCount is already
// >zero, then all of its parents are guaranteed >zero.
parentCount = ec.itemCount
ec.itemCount++
srec[parentRef] = ec
parentRef = ec.ent.ParentRef
}
}
// walk the map of folder entries; every folder with one or more
// items gets added back to the items slice to be returned.
for _, ec := range srec {
if ec.itemCount > 0 {
items = append(items, ec.ent)
}
}
return &Details{
DetailsModel: DetailsModel{items},
}
}
// --------------------------------------------------------------------------------
// Entry
// --------------------------------------------------------------------------------

View File

@ -995,3 +995,103 @@ func (suite *DetailsUnitSuite) TestFolderEntriesForPath() {
})
}
}
func (suite *DetailsUnitSuite) TestDetails_FilterMetaFiles() {
t := suite.T()
dm := DetailsModel{
Entries: []DetailsEntry{
{
RepoRef: "a.data",
ItemInfo: ItemInfo{
OneDrive: &OneDriveInfo{IsMeta: false},
},
},
{
RepoRef: "b.meta",
ItemInfo: ItemInfo{
OneDrive: &OneDriveInfo{IsMeta: false},
},
},
{
RepoRef: "c.meta",
ItemInfo: ItemInfo{
OneDrive: &OneDriveInfo{IsMeta: true},
},
},
},
}
d := &Details{dm}
d2 := d.FilterMetaFiles()
assert.Len(t, d2.DetailsModel.Entries, 1)
assert.Len(t, d.DetailsModel.Entries, 3)
}
func toDetails(fes []folderEntry) *Details {
d := &Details{
DetailsModel: DetailsModel{
Entries: make([]DetailsEntry, len(fes)),
},
}
for i, fe := range fes {
d.DetailsModel.Entries[i] = DetailsEntry{
RepoRef: fe.RepoRef,
ShortRef: fe.ShortRef,
ParentRef: fe.ParentRef,
LocationRef: fe.LocationRef,
ItemInfo: fe.Info,
}
}
return d
}
func (suite *DetailsUnitSuite) TestDetails_FilterEmptyContainers() {
var (
t = suite.T()
empty = basePath.Append("populated", "empty")
fes = FolderEntriesForPath(empty, empty)
d = toDetails(fes)
itemP = basePath.Append("populated", "item")
)
item := DetailsEntry{
RepoRef: itemP.String(),
ShortRef: itemP.ShortRef(),
ParentRef: itemP.Dir().ShortRef(),
LocationRef: "todo - not currently needed",
ItemInfo: ItemInfo{
OneDrive: &OneDriveInfo{
ItemName: "item",
},
},
}
d.DetailsModel.Entries = append(d.DetailsModel.Entries, item)
var (
ds = d.DetailsModel.Entries
result = d.FilterEmptyContainers()
rs = result.DetailsModel.Entries
)
assert.Equal(t, len(ds)-1, len(rs), "one empty folder should have been removed")
for _, r := range rs {
assert.NotEqual(t, empty.String(), r.RepoRef, "the empty path should have been removed")
}
dFilt := make([]DetailsEntry, 0, len(ds)-1)
for _, d := range ds {
if d.RepoRef != empty.String() {
dFilt = append(dFilt, d)
}
}
assert.ElementsMatch(t, dFilt, rs, "all other paths should be present")
}

View File

@ -2,7 +2,6 @@ package repository
import (
"context"
"strings"
"time"
"github.com/alcionai/clues"
@ -10,14 +9,12 @@ import (
"github.com/pkg/errors"
"github.com/alcionai/corso/src/internal/common/crash"
"github.com/alcionai/corso/src/internal/connector/onedrive"
"github.com/alcionai/corso/src/internal/events"
"github.com/alcionai/corso/src/internal/kopia"
"github.com/alcionai/corso/src/internal/model"
"github.com/alcionai/corso/src/internal/observe"
"github.com/alcionai/corso/src/internal/operations"
"github.com/alcionai/corso/src/internal/streamstore"
"github.com/alcionai/corso/src/internal/version"
"github.com/alcionai/corso/src/pkg/account"
"github.com/alcionai/corso/src/pkg/backup"
"github.com/alcionai/corso/src/pkg/backup/details"
@ -361,21 +358,7 @@ func (r repository) BackupDetails(
return nil, nil, errs.Fail(err)
}
// Retroactively fill in isMeta information for items in older
// backup versions without that info
// version.Restore2 introduces the IsMeta flag, so only v1 needs a check.
if b.Version >= version.OneDrive1DataAndMetaFiles && b.Version < version.OneDrive3IsMetaMarker {
for _, d := range deets.Entries {
if d.OneDrive != nil {
if strings.HasSuffix(d.RepoRef, onedrive.MetaFileSuffix) ||
strings.HasSuffix(d.RepoRef, onedrive.DirMetaFileSuffix) {
d.OneDrive.IsMeta = true
}
}
}
}
return deets, b, errs
return deets.FilterMetaFiles().FilterEmptyContainers(), b, errs
}
// DeleteBackup removes the backup from both the model store and the backup storage.