Exclude metadata files from the set that we merge during incrementals (#2650)
Metadata files don't need to appear in backup details, which means they also shouldn't be merged during incremental backups. If we expect them to be merged then we'll fail the item count check for the merge process This excludes items with metadata extensions from merging by not adding the to the list of items to merge Also make it so that metadata items don't create backup details entries in the first place Manually tested to ensure restore(backup(onedrive)) still works as expected --- #### Does this PR need a docs update or release note? - [ ] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [x] ⛔ No #### Type of change - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Test - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) * closes #2498 #### Test Plan - [ ] 💪 Manual - [x] ⚡ Unit test - [ ] 💚 E2E
This commit is contained in:
parent
e42e3f253b
commit
83fe83a367
16
src/internal/connector/graph/metadata/metadata.go
Normal file
16
src/internal/connector/graph/metadata/metadata.go
Normal file
@ -0,0 +1,16 @@
|
||||
package metadata
|
||||
|
||||
import (
|
||||
"github.com/alcionai/corso/src/internal/connector/onedrive"
|
||||
"github.com/alcionai/corso/src/pkg/path"
|
||||
)
|
||||
|
||||
func IsMetadataFile(p path.Path) bool {
|
||||
switch p.Service() {
|
||||
case path.OneDriveService:
|
||||
return onedrive.IsMetaFile(p.Item())
|
||||
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
155
src/internal/connector/graph/metadata/metadata_test.go
Normal file
155
src/internal/connector/graph/metadata/metadata_test.go
Normal file
@ -0,0 +1,155 @@
|
||||
package metadata_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/stretchr/testify/suite"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/connector/graph/metadata"
|
||||
"github.com/alcionai/corso/src/internal/connector/onedrive"
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
"github.com/alcionai/corso/src/pkg/path"
|
||||
)
|
||||
|
||||
type boolfAssertionFunc func(assert.TestingT, bool, string, ...interface{}) bool
|
||||
|
||||
type testCase struct {
|
||||
service path.ServiceType
|
||||
category path.CategoryType
|
||||
expected boolfAssertionFunc
|
||||
}
|
||||
|
||||
var (
|
||||
tenant = "a-tenant"
|
||||
user = "a-user"
|
||||
|
||||
notMetaSuffixes = []string{
|
||||
"",
|
||||
onedrive.DataFileSuffix,
|
||||
}
|
||||
|
||||
metaSuffixes = []string{
|
||||
onedrive.MetaFileSuffix,
|
||||
onedrive.DirMetaFileSuffix,
|
||||
}
|
||||
|
||||
cases = []testCase{
|
||||
{
|
||||
service: path.ExchangeService,
|
||||
category: path.EmailCategory,
|
||||
expected: assert.Falsef,
|
||||
},
|
||||
{
|
||||
service: path.ExchangeService,
|
||||
category: path.ContactsCategory,
|
||||
expected: assert.Falsef,
|
||||
},
|
||||
{
|
||||
service: path.ExchangeService,
|
||||
category: path.EventsCategory,
|
||||
expected: assert.Falsef,
|
||||
},
|
||||
{
|
||||
service: path.OneDriveService,
|
||||
category: path.FilesCategory,
|
||||
expected: assert.Truef,
|
||||
},
|
||||
{
|
||||
service: path.SharePointService,
|
||||
category: path.LibrariesCategory,
|
||||
expected: assert.Falsef,
|
||||
},
|
||||
{
|
||||
service: path.SharePointService,
|
||||
category: path.ListsCategory,
|
||||
expected: assert.Falsef,
|
||||
},
|
||||
{
|
||||
service: path.SharePointService,
|
||||
category: path.PagesCategory,
|
||||
expected: assert.Falsef,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
type MetadataUnitSuite struct {
|
||||
tester.Suite
|
||||
}
|
||||
|
||||
func TestMetadataUnitSuite(t *testing.T) {
|
||||
suite.Run(t, &MetadataUnitSuite{Suite: tester.NewUnitSuite(t)})
|
||||
}
|
||||
|
||||
func (suite *MetadataUnitSuite) TestIsMetadataFile_Files_MetaSuffixes() {
|
||||
for _, test := range cases {
|
||||
for _, ext := range metaSuffixes {
|
||||
suite.Run(fmt.Sprintf("%s %s %s", test.service, test.category, ext), func() {
|
||||
t := suite.T()
|
||||
|
||||
p, err := path.Builder{}.
|
||||
Append("file"+ext).
|
||||
ToDataLayerPath(
|
||||
tenant,
|
||||
user,
|
||||
test.service,
|
||||
test.category,
|
||||
true,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
test.expected(t, metadata.IsMetadataFile(p), "extension %s", ext)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *MetadataUnitSuite) TestIsMetadataFile_Files_NotMetaSuffixes() {
|
||||
for _, test := range cases {
|
||||
for _, ext := range notMetaSuffixes {
|
||||
suite.Run(fmt.Sprintf("%s %s %s", test.service, test.category, ext), func() {
|
||||
t := suite.T()
|
||||
|
||||
p, err := path.Builder{}.
|
||||
Append("file"+ext).
|
||||
ToDataLayerPath(
|
||||
tenant,
|
||||
user,
|
||||
test.service,
|
||||
test.category,
|
||||
true,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Falsef(t, metadata.IsMetadataFile(p), "extension %s", ext)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *MetadataUnitSuite) TestIsMetadataFile_Directories() {
|
||||
suffixes := append(append([]string{}, notMetaSuffixes...), metaSuffixes...)
|
||||
|
||||
for _, test := range cases {
|
||||
for _, ext := range suffixes {
|
||||
suite.Run(fmt.Sprintf("%s %s %s", test.service, test.category, ext), func() {
|
||||
t := suite.T()
|
||||
|
||||
p, err := path.Builder{}.
|
||||
Append("file"+ext).
|
||||
ToDataLayerPath(
|
||||
tenant,
|
||||
user,
|
||||
test.service,
|
||||
test.category,
|
||||
false,
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
assert.Falsef(t, metadata.IsMetadataFile(p), "extension %s", ext)
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -5,6 +5,7 @@ import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
@ -39,11 +40,17 @@ const (
|
||||
DataFileSuffix = ".data"
|
||||
)
|
||||
|
||||
func IsMetaFile(name string) bool {
|
||||
return strings.HasSuffix(name, MetaFileSuffix) || strings.HasSuffix(name, DirMetaFileSuffix)
|
||||
}
|
||||
|
||||
var (
|
||||
_ data.BackupCollection = &Collection{}
|
||||
_ data.Stream = &Item{}
|
||||
_ data.StreamInfo = &Item{}
|
||||
_ data.StreamModTime = &Item{}
|
||||
_ data.Stream = &metadataItem{}
|
||||
_ data.StreamModTime = &metadataItem{}
|
||||
)
|
||||
|
||||
// Collection represents a set of OneDrive objects retrieved from M365
|
||||
@ -213,9 +220,6 @@ type Item struct {
|
||||
id string
|
||||
data io.ReadCloser
|
||||
info details.ItemInfo
|
||||
|
||||
// true if the item was marked by graph as deleted.
|
||||
deleted bool
|
||||
}
|
||||
|
||||
func (od *Item) UUID() string {
|
||||
@ -226,9 +230,11 @@ func (od *Item) ToReader() io.ReadCloser {
|
||||
return od.data
|
||||
}
|
||||
|
||||
// TODO(ashmrtn): Fill in once delta tokens return deleted items.
|
||||
// Deleted implements an interface function. However, OneDrive items are marked
|
||||
// as deleted by adding them to the exclude list so this can always return
|
||||
// false.
|
||||
func (od Item) Deleted() bool {
|
||||
return od.deleted
|
||||
return false
|
||||
}
|
||||
|
||||
func (od *Item) Info() details.ItemInfo {
|
||||
@ -239,6 +245,31 @@ func (od *Item) ModTime() time.Time {
|
||||
return od.info.Modified()
|
||||
}
|
||||
|
||||
type metadataItem struct {
|
||||
id string
|
||||
data io.ReadCloser
|
||||
modTime time.Time
|
||||
}
|
||||
|
||||
func (od *metadataItem) UUID() string {
|
||||
return od.id
|
||||
}
|
||||
|
||||
func (od *metadataItem) ToReader() io.ReadCloser {
|
||||
return od.data
|
||||
}
|
||||
|
||||
// Deleted implements an interface function. However, OneDrive items are marked
|
||||
// as deleted by adding them to the exclude list so this can always return
|
||||
// false.
|
||||
func (od metadataItem) Deleted() bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func (od *metadataItem) ModTime() time.Time {
|
||||
return od.modTime
|
||||
}
|
||||
|
||||
// populateItems iterates through items added to the collection
|
||||
// and uses the collection `itemReader` to read the item
|
||||
func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
|
||||
@ -412,28 +443,10 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
|
||||
return progReader, nil
|
||||
})
|
||||
|
||||
// TODO(meain): Remove this once we change to always
|
||||
// backing up permissions. Until then we cannot rely
|
||||
// on whether the previous data is what we need as the
|
||||
// user might have not backup up permissions in the
|
||||
// previous run.
|
||||
metaItemInfo := details.ItemInfo{}
|
||||
metaItemInfo.OneDrive = &details.OneDriveInfo{
|
||||
Created: itemInfo.OneDrive.Created,
|
||||
ItemName: itemInfo.OneDrive.ItemName,
|
||||
DriveName: itemInfo.OneDrive.DriveName,
|
||||
ItemType: itemInfo.OneDrive.ItemType,
|
||||
IsMeta: true,
|
||||
Modified: time.Now(), // set to current time to always refresh
|
||||
Owner: itemInfo.OneDrive.Owner,
|
||||
ParentPath: itemInfo.OneDrive.ParentPath,
|
||||
Size: itemInfo.OneDrive.Size,
|
||||
}
|
||||
|
||||
oc.data <- &Item{
|
||||
id: itemName + metaSuffix,
|
||||
data: metaReader,
|
||||
info: metaItemInfo,
|
||||
oc.data <- &metadataItem{
|
||||
id: itemName + metaSuffix,
|
||||
data: metaReader,
|
||||
modTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -22,6 +22,7 @@ import (
|
||||
"github.com/kopia/kopia/snapshot/snapshotfs"
|
||||
"github.com/pkg/errors"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/connector/graph/metadata"
|
||||
"github.com/alcionai/corso/src/internal/data"
|
||||
D "github.com/alcionai/corso/src/internal/diagnostics"
|
||||
"github.com/alcionai/corso/src/pkg/backup/details"
|
||||
@ -448,16 +449,25 @@ func streamBaseEntries(
|
||||
return errors.Wrap(err, "getting previous full item path for base entry")
|
||||
}
|
||||
|
||||
// All items have item info in the base backup. However, we need to make
|
||||
// sure we have enough metadata to find those entries. To do that we add the
|
||||
// item to progress and having progress aggregate everything for later.
|
||||
d := &itemDetails{
|
||||
info: nil,
|
||||
repoPath: itemPath,
|
||||
prevPath: prevItemPath,
|
||||
locationPath: locationPath,
|
||||
// Meta files aren't in backup details since it's the set of items the user
|
||||
// sees.
|
||||
//
|
||||
// TODO(ashmrtn): We may eventually want to make this a function that is
|
||||
// passed in so that we can more easily switch it between different external
|
||||
// service provider implementations.
|
||||
if !metadata.IsMetadataFile(itemPath) {
|
||||
// All items have item info in the base backup. However, we need to make
|
||||
// sure we have enough metadata to find those entries. To do that we add
|
||||
// the item to progress and having progress aggregate everything for
|
||||
// later.
|
||||
d := &itemDetails{
|
||||
info: nil,
|
||||
repoPath: itemPath,
|
||||
prevPath: prevItemPath,
|
||||
locationPath: locationPath,
|
||||
}
|
||||
progress.put(encodeAsPath(itemPath.PopFront().Elements()...), d)
|
||||
}
|
||||
progress.put(encodeAsPath(itemPath.PopFront().Elements()...), d)
|
||||
|
||||
if err := cb(ctx, entry); err != nil {
|
||||
return errors.Wrapf(err, "executing callback on item %q", itemPath)
|
||||
|
||||
@ -17,6 +17,7 @@ import (
|
||||
"golang.org/x/exp/maps"
|
||||
|
||||
"github.com/alcionai/corso/src/internal/connector/mockconnector"
|
||||
"github.com/alcionai/corso/src/internal/connector/onedrive"
|
||||
"github.com/alcionai/corso/src/internal/data"
|
||||
"github.com/alcionai/corso/src/internal/tester"
|
||||
"github.com/alcionai/corso/src/pkg/fault"
|
||||
@ -324,6 +325,166 @@ func (suite *KopiaIntegrationSuite) TestBackupCollections() {
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *KopiaIntegrationSuite) TestBackupCollections_NoDetailsForMeta() {
|
||||
tmp, err := path.Builder{}.Append(testInboxDir).ToDataLayerPath(
|
||||
testTenant,
|
||||
testUser,
|
||||
path.OneDriveService,
|
||||
path.FilesCategory,
|
||||
false)
|
||||
require.NoError(suite.T(), err)
|
||||
|
||||
storePath := tmp
|
||||
locPath := tmp
|
||||
|
||||
// tags that are supplied by the caller. This includes basic tags to support
|
||||
// lookups and extra tags the caller may want to apply.
|
||||
tags := map[string]string{
|
||||
"fnords": "smarf",
|
||||
"brunhilda": "",
|
||||
}
|
||||
|
||||
reasons := []Reason{
|
||||
{
|
||||
ResourceOwner: storePath.ResourceOwner(),
|
||||
Service: storePath.Service(),
|
||||
Category: storePath.Category(),
|
||||
},
|
||||
}
|
||||
|
||||
for _, r := range reasons {
|
||||
for _, k := range r.TagKeys() {
|
||||
tags[k] = ""
|
||||
}
|
||||
}
|
||||
|
||||
expectedTags := map[string]string{}
|
||||
|
||||
maps.Copy(expectedTags, normalizeTagKVs(tags))
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
expectedUploadedFiles int
|
||||
expectedCachedFiles int
|
||||
numDeetsEntries int
|
||||
hasMetaDeets bool
|
||||
cols func() []data.BackupCollection
|
||||
}{
|
||||
{
|
||||
name: "Uncached",
|
||||
expectedUploadedFiles: 3,
|
||||
expectedCachedFiles: 0,
|
||||
// MockStream implements item info even though OneDrive doesn't.
|
||||
numDeetsEntries: 3,
|
||||
hasMetaDeets: true,
|
||||
cols: func() []data.BackupCollection {
|
||||
mc := mockconnector.NewMockExchangeCollection(
|
||||
storePath,
|
||||
locPath,
|
||||
3)
|
||||
mc.Names[0] = testFileName
|
||||
mc.Names[1] = testFileName + onedrive.MetaFileSuffix
|
||||
mc.Names[2] = storePath.Folders()[0] + onedrive.DirMetaFileSuffix
|
||||
|
||||
return []data.BackupCollection{mc}
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Cached",
|
||||
expectedUploadedFiles: 1,
|
||||
expectedCachedFiles: 2,
|
||||
// Meta entries are filtered out.
|
||||
numDeetsEntries: 1,
|
||||
hasMetaDeets: false,
|
||||
cols: func() []data.BackupCollection {
|
||||
mc := mockconnector.NewMockExchangeCollection(
|
||||
storePath,
|
||||
locPath,
|
||||
1)
|
||||
mc.Names[0] = testFileName
|
||||
mc.ColState = data.NotMovedState
|
||||
|
||||
return []data.BackupCollection{mc}
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
prevSnaps := []IncrementalBase{}
|
||||
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
t := suite.T()
|
||||
collections := test.cols()
|
||||
|
||||
stats, deets, prevShortRefs, err := suite.w.BackupCollections(
|
||||
suite.ctx,
|
||||
prevSnaps,
|
||||
collections,
|
||||
nil,
|
||||
tags,
|
||||
true,
|
||||
fault.New(true))
|
||||
assert.NoError(t, err)
|
||||
|
||||
assert.Equal(t, test.expectedUploadedFiles, stats.TotalFileCount, "total files")
|
||||
assert.Equal(t, test.expectedUploadedFiles, stats.UncachedFileCount, "uncached files")
|
||||
assert.Equal(t, test.expectedCachedFiles, stats.CachedFileCount, "cached files")
|
||||
assert.Equal(t, 5, stats.TotalDirectoryCount)
|
||||
assert.Equal(t, 0, stats.IgnoredErrorCount)
|
||||
assert.Equal(t, 0, stats.ErrorCount)
|
||||
assert.False(t, stats.Incomplete)
|
||||
|
||||
// 47 file and 5 folder entries.
|
||||
details := deets.Details().Entries
|
||||
assert.Len(
|
||||
t,
|
||||
details,
|
||||
test.numDeetsEntries+5,
|
||||
)
|
||||
|
||||
for _, entry := range details {
|
||||
assert.True(t, entry.Updated)
|
||||
|
||||
if test.hasMetaDeets {
|
||||
continue
|
||||
}
|
||||
|
||||
assert.False(t, onedrive.IsMetaFile(entry.RepoRef), "metadata entry in details")
|
||||
}
|
||||
|
||||
assert.Len(t, prevShortRefs, 0)
|
||||
for _, prevRef := range prevShortRefs {
|
||||
assert.False(
|
||||
t,
|
||||
onedrive.IsMetaFile(prevRef.Repo.String()),
|
||||
"metadata entry in base details")
|
||||
}
|
||||
|
||||
checkSnapshotTags(
|
||||
t,
|
||||
suite.ctx,
|
||||
suite.w.c,
|
||||
expectedTags,
|
||||
stats.SnapshotID,
|
||||
)
|
||||
|
||||
snap, err := snapshot.LoadSnapshot(
|
||||
suite.ctx,
|
||||
suite.w.c,
|
||||
manifest.ID(stats.SnapshotID),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
prevSnaps = append(prevSnaps, IncrementalBase{
|
||||
Manifest: snap,
|
||||
SubtreePaths: []*path.Builder{
|
||||
storePath.ToBuilder().Dir(),
|
||||
},
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func (suite *KopiaIntegrationSuite) TestRestoreAfterCompressionChange() {
|
||||
t := suite.T()
|
||||
ctx, flush := tester.NewContext()
|
||||
|
||||
@ -2,7 +2,6 @@ package repository
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/alcionai/clues"
|
||||
@ -367,10 +366,7 @@ func (r repository) BackupDetails(
|
||||
if b.Version >= version.OneDrive1DataAndMetaFiles && b.Version < version.OneDrive3IsMetaMarker {
|
||||
for _, d := range deets.Entries {
|
||||
if d.OneDrive != nil {
|
||||
if strings.HasSuffix(d.RepoRef, onedrive.MetaFileSuffix) ||
|
||||
strings.HasSuffix(d.RepoRef, onedrive.DirMetaFileSuffix) {
|
||||
d.OneDrive.IsMeta = true
|
||||
}
|
||||
d.OneDrive.IsMeta = onedrive.IsMetaFile(d.RepoRef)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user