Add Fetch() to RestoreCollection (#2434)

## Description

Add a function to fetch a file from the collection synchronously. This will help avoid data dependencies on the restore path created by splitting item information across multiple kopia files

Fetch function is currently unoptimized, though deeper analysis of memory footprint should be done before changing

Viewing by commit will help reduce chaff from updating tests to comply with the new interface

## Does this PR need a docs update or release note?

- [ ]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [x]  No 

## Type of change

- [x] 🌻 Feature
- [ ] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Test
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

## Issue(s)

* #1535

## Test Plan

- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
ashmrtn 2023-02-08 14:05:38 -08:00 committed by GitHub
parent 47f5ca1d95
commit 129d6b0b0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 275 additions and 37 deletions

View File

@ -175,7 +175,7 @@ func buildCollections(
mc.Data[i] = c.items[i].data mc.Data[i] = c.items[i].data
} }
collections = append(collections, mc) collections = append(collections, data.NotFoundRestoreCollection{Collection: mc})
} }
return collections, nil return collections, nil

View File

@ -174,7 +174,9 @@ func (suite *DataCollectionsUnitSuite) TestParseMetadataCollections() {
) )
require.NoError(t, err) require.NoError(t, err)
cdps, err := parseMetadataCollections(ctx, []data.RestoreCollection{coll}) cdps, err := parseMetadataCollections(ctx, []data.RestoreCollection{
data.NotFoundRestoreCollection{Collection: coll},
})
test.expectError(t, err) test.expectError(t, err)
emails := cdps[path.EmailCategory] emails := cdps[path.EmailCategory]
@ -345,7 +347,9 @@ func (suite *DataCollectionsIntegrationSuite) TestDelta() {
require.NotNil(t, metadata, "collections contains a metadata collection") require.NotNil(t, metadata, "collections contains a metadata collection")
cdps, err := parseMetadataCollections(ctx, []data.RestoreCollection{metadata}) cdps, err := parseMetadataCollections(ctx, []data.RestoreCollection{
data.NotFoundRestoreCollection{Collection: metadata},
})
require.NoError(t, err) require.NoError(t, err)
dps := cdps[test.scope.Category().PathType()] dps := cdps[test.scope.Category().PathType()]

View File

@ -989,7 +989,9 @@ func collectionsForInfo(
} }
} }
collections = append(collections, c) collections = append(collections, data.NotFoundRestoreCollection{
Collection: c,
})
kopiaEntries += len(info.items) kopiaEntries += len(info.items)
} }
@ -1034,7 +1036,9 @@ func collectionsForInfoVersion0(
baseExpected[info.items[i].lookupKey] = info.items[i].data baseExpected[info.items[i].lookupKey] = info.items[i].data
} }
collections = append(collections, c) collections = append(collections, data.NotFoundRestoreCollection{
Collection: c,
})
totalItems += len(info.items) totalItems += len(info.items)
kopiaEntries += len(info.items) kopiaEntries += len(info.items)
} }

View File

@ -27,13 +27,10 @@ type MockExchangeDataCollection struct {
} }
var ( var (
// Needs to implement both backup and restore interfaces so we can use it in _ data.BackupCollection = &MockExchangeDataCollection{}
// integration tests. _ data.Stream = &MockExchangeData{}
_ data.BackupCollection = &MockExchangeDataCollection{} _ data.StreamInfo = &MockExchangeData{}
_ data.RestoreCollection = &MockExchangeDataCollection{} _ data.StreamSize = &MockExchangeData{}
_ data.Stream = &MockExchangeData{}
_ data.StreamInfo = &MockExchangeData{}
_ data.StreamSize = &MockExchangeData{}
) )
// NewMockExchangeDataCollection creates an data collection that will return the specified number of // NewMockExchangeDataCollection creates an data collection that will return the specified number of

View File

@ -996,7 +996,7 @@ func (suite *OneDriveCollectionsSuite) TestDeserializeMetadata() {
) )
require.NoError(t, err) require.NoError(t, err)
cols = append(cols, mc) cols = append(cols, data.NotFoundRestoreCollection{Collection: mc})
} }
deltas, paths, err := deserializeMetadata(ctx, cols) deltas, paths, err := deserializeMetadata(ctx, cols)
@ -1529,7 +1529,9 @@ func (suite *OneDriveCollectionsSuite) TestGet() {
for _, baseCol := range cols { for _, baseCol := range cols {
folderPath := baseCol.FullPath().String() folderPath := baseCol.FullPath().String()
if folderPath == metadataPath.String() { if folderPath == metadataPath.String() {
deltas, paths, err := deserializeMetadata(ctx, []data.RestoreCollection{baseCol}) deltas, paths, err := deserializeMetadata(ctx, []data.RestoreCollection{
data.NotFoundRestoreCollection{Collection: baseCol},
})
if !assert.NoError(t, err, "deserializing metadata") { if !assert.NoError(t, err, "deserializing metadata") {
continue continue
} }

View File

@ -68,6 +68,10 @@ type BackupCollection interface {
// RestoreCollection is an extension of Collection that is used during restores. // RestoreCollection is an extension of Collection that is used during restores.
type RestoreCollection interface { type RestoreCollection interface {
Collection Collection
// Fetch retrieves an item with the given name from the Collection if it
// exists. Items retrieved with Fetch may still appear in the channel returned
// by Items().
Fetch(ctx context.Context, name string) (Stream, error)
} }
// NotFoundRestoreCollection is a wrapper for a Collection that returns // NotFoundRestoreCollection is a wrapper for a Collection that returns

View File

@ -1,8 +1,12 @@
package kopia package kopia
import ( import (
"context"
"io" "io"
"github.com/alcionai/clues"
"github.com/kopia/kopia/fs"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/path"
) )
@ -13,8 +17,10 @@ var (
) )
type kopiaDataCollection struct { type kopiaDataCollection struct {
path path.Path path path.Path
streams []data.Stream streams []data.Stream
snapshotRoot fs.Entry
counter ByteCounter
} }
func (kdc *kopiaDataCollection) Items() <-chan data.Stream { func (kdc *kopiaDataCollection) Items() <-chan data.Stream {
@ -35,6 +41,25 @@ func (kdc kopiaDataCollection) FullPath() path.Path {
return kdc.path return kdc.path
} }
func (kdc kopiaDataCollection) Fetch(
ctx context.Context,
name string,
) (data.Stream, error) {
if kdc.snapshotRoot == nil {
return nil, clues.New("no snapshot root")
}
p, err := kdc.FullPath().Append(name, true)
if err != nil {
return nil, clues.Wrap(err, "creating item path")
}
// TODO(ashmrtn): We could possibly hold a reference to the folder this
// collection corresponds to, but that requires larger changes for the
// creation of these collections.
return getItemStream(ctx, p, kdc.snapshotRoot, kdc.counter)
}
type kopiaDataStream struct { type kopiaDataStream struct {
reader io.ReadCloser reader io.ReadCloser
uuid string uuid string

View File

@ -2,14 +2,20 @@ package kopia
import ( import (
"bytes" "bytes"
"context"
"errors"
"io" "io"
"testing" "testing"
"github.com/kopia/kopia/fs"
"github.com/kopia/kopia/fs/virtualfs"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"github.com/alcionai/corso/src/internal/connector/mockconnector"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/tester"
"github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/path"
) )
@ -113,3 +119,172 @@ func (suite *KopiaDataCollectionUnitSuite) TestReturnsStreams() {
}) })
} }
} }
// These types are needed because we check that a fs.File was returned.
// Unfortunately fs.StreamingFile and fs.File have different interfaces so we
// have to fake things.
type mockSeeker struct{}
func (s mockSeeker) Seek(offset int64, whence int) (int64, error) {
return 0, errors.New("not implemented")
}
type mockReader struct {
io.ReadCloser
mockSeeker
}
func (r mockReader) Entry() (fs.Entry, error) {
return nil, errors.New("not implemented")
}
type mockFile struct {
// Use for Entry interface.
fs.StreamingFile
r io.ReadCloser
}
func (f *mockFile) Open(ctx context.Context) (fs.Reader, error) {
return mockReader{ReadCloser: f.r}, nil
}
func (suite *KopiaDataCollectionUnitSuite) TestFetch() {
var (
tenant = "a-tenant"
user = "a-user"
service = path.ExchangeService.String()
category = path.EmailCategory
folder1 = "folder1"
folder2 = "folder2"
noErrFileName = "noError"
errFileName = "error"
noErrFileData = "foo bar baz"
errReader = &mockconnector.MockExchangeData{
ReadErr: assert.AnError,
}
)
// Needs to be a function so we can switch the serialization version as
// needed.
getLayout := func(serVersion uint32) fs.Entry {
return virtualfs.NewStaticDirectory(encodeAsPath(tenant), []fs.Entry{
virtualfs.NewStaticDirectory(encodeAsPath(service), []fs.Entry{
virtualfs.NewStaticDirectory(encodeAsPath(user), []fs.Entry{
virtualfs.NewStaticDirectory(encodeAsPath(category.String()), []fs.Entry{
virtualfs.NewStaticDirectory(encodeAsPath(folder1), []fs.Entry{
virtualfs.NewStaticDirectory(encodeAsPath(folder2), []fs.Entry{
&mockFile{
StreamingFile: virtualfs.StreamingFileFromReader(
encodeAsPath(noErrFileName),
nil,
),
r: newBackupStreamReader(
serVersion,
io.NopCloser(bytes.NewReader([]byte(noErrFileData))),
),
},
&mockFile{
StreamingFile: virtualfs.StreamingFileFromReader(
encodeAsPath(errFileName),
nil,
),
r: newBackupStreamReader(
serVersion,
errReader.ToReader(),
),
},
}),
}),
}),
}),
}),
})
}
b := path.Builder{}.Append(folder1, folder2)
pth, err := b.ToDataLayerExchangePathForCategory(
tenant,
user,
category,
false,
)
require.NoError(suite.T(), err)
table := []struct {
name string
inputName string
inputSerializationVersion uint32
expectedData []byte
lookupErr assert.ErrorAssertionFunc
readErr assert.ErrorAssertionFunc
notFoundErr bool
}{
{
name: "FileFound_NoError",
inputName: noErrFileName,
inputSerializationVersion: serializationVersion,
expectedData: []byte(noErrFileData),
lookupErr: assert.NoError,
readErr: assert.NoError,
},
{
name: "FileFound_ReadError",
inputName: errFileName,
inputSerializationVersion: serializationVersion,
lookupErr: assert.NoError,
readErr: assert.Error,
},
{
name: "FileFound_VersionError",
inputName: noErrFileName,
inputSerializationVersion: serializationVersion + 1,
lookupErr: assert.NoError,
readErr: assert.Error,
},
{
name: "FileNotFound",
inputName: "foo",
inputSerializationVersion: serializationVersion + 1,
lookupErr: assert.Error,
notFoundErr: true,
},
}
for _, test := range table {
suite.Run(test.name, func() {
ctx, flush := tester.NewContext()
defer flush()
t := suite.T()
root := getLayout(test.inputSerializationVersion)
c := &i64counter{}
col := &kopiaDataCollection{path: pth, snapshotRoot: root, counter: c}
s, err := col.Fetch(ctx, test.inputName)
test.lookupErr(t, err)
if err != nil {
if test.notFoundErr {
assert.ErrorIs(t, err, data.ErrNotFound)
}
return
}
fileData, err := io.ReadAll(s.ToReader())
test.readErr(t, err)
if err != nil {
return
}
assert.Equal(t, test.expectedData, fileData)
})
}
}

View File

@ -412,13 +412,19 @@ func (w Wrapper) RestoreMultipleItems(
c, ok := cols[parentPath.ShortRef()] c, ok := cols[parentPath.ShortRef()]
if !ok { if !ok {
cols[parentPath.ShortRef()] = &kopiaDataCollection{path: parentPath} cols[parentPath.ShortRef()] = &kopiaDataCollection{
path: parentPath,
snapshotRoot: snapshotRoot,
counter: bcounter,
}
c = cols[parentPath.ShortRef()] c = cols[parentPath.ShortRef()]
} }
c.streams = append(c.streams, ds) c.streams = append(c.streams, ds)
} }
// Can't use the maps package to extract the values because we need to convert
// from *kopiaDataCollection to data.RestoreCollection too.
res := make([]data.RestoreCollection, 0, len(cols)) res := make([]data.RestoreCollection, 0, len(cols))
for _, c := range cols { for _, c := range cols {
res = append(res, c) res = append(res, c)

View File

@ -410,7 +410,7 @@ func buildCollections(
mc.Data[i] = c.items[i].data mc.Data[i] = c.items[i].data
} }
collections = append(collections, mc) collections = append(collections, data.NotFoundRestoreCollection{Collection: mc})
} }
return collections return collections

View File

@ -435,7 +435,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() {
getMeta bool getMeta bool
assertErr assert.ErrorAssertionFunc assertErr assert.ErrorAssertionFunc
assertB assert.BoolAssertionFunc assertB assert.BoolAssertionFunc
expectDCS []data.RestoreCollection expectDCS []mockColl
expectNilMans bool expectNilMans bool
}{ }{
{ {
@ -539,7 +539,7 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() {
name: "man missing backup id", name: "man missing backup id",
mr: mockManifestRestorer{ mr: mockManifestRestorer{
mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{
"id": {mockColl{id: "id_coll"}}, "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}},
}}, }},
mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "id", "", "")}, mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "id", "", "")},
}, },
@ -566,8 +566,8 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() {
name: "one complete, one incomplete", name: "one complete, one incomplete",
mr: mockManifestRestorer{ mr: mockManifestRestorer{
mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{
"id": {mockColl{id: "id_coll"}}, "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}},
"incmpl_id": {mockColl{id: "incmpl_id_coll"}}, "incmpl_id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "incmpl_id_coll"}}},
}}, }},
mans: []*kopia.ManifestEntry{ mans: []*kopia.ManifestEntry{
makeMan(path.EmailCategory, "id", "", "bid"), makeMan(path.EmailCategory, "id", "", "bid"),
@ -579,13 +579,13 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() {
getMeta: true, getMeta: true,
assertErr: assert.NoError, assertErr: assert.NoError,
assertB: assert.True, assertB: assert.True,
expectDCS: []data.RestoreCollection{mockColl{id: "id_coll"}}, expectDCS: []mockColl{{id: "id_coll"}},
}, },
{ {
name: "single valid man", name: "single valid man",
mr: mockManifestRestorer{ mr: mockManifestRestorer{
mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{
"id": {mockColl{id: "id_coll"}}, "id": {data.NotFoundRestoreCollection{Collection: mockColl{id: "id_coll"}}},
}}, }},
mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "id", "", "bid")}, mans: []*kopia.ManifestEntry{makeMan(path.EmailCategory, "id", "", "bid")},
}, },
@ -594,14 +594,14 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() {
getMeta: true, getMeta: true,
assertErr: assert.NoError, assertErr: assert.NoError,
assertB: assert.True, assertB: assert.True,
expectDCS: []data.RestoreCollection{mockColl{id: "id_coll"}}, expectDCS: []mockColl{{id: "id_coll"}},
}, },
{ {
name: "multiple valid mans", name: "multiple valid mans",
mr: mockManifestRestorer{ mr: mockManifestRestorer{
mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{ mockRestorer: mockRestorer{collsByID: map[string][]data.RestoreCollection{
"mail": {mockColl{id: "mail_coll"}}, "mail": {data.NotFoundRestoreCollection{Collection: mockColl{id: "mail_coll"}}},
"contact": {mockColl{id: "contact_coll"}}, "contact": {data.NotFoundRestoreCollection{Collection: mockColl{id: "contact_coll"}}},
}}, }},
mans: []*kopia.ManifestEntry{ mans: []*kopia.ManifestEntry{
makeMan(path.EmailCategory, "mail", "", "bid"), makeMan(path.EmailCategory, "mail", "", "bid"),
@ -613,9 +613,9 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() {
getMeta: true, getMeta: true,
assertErr: assert.NoError, assertErr: assert.NoError,
assertB: assert.True, assertB: assert.True,
expectDCS: []data.RestoreCollection{ expectDCS: []mockColl{
mockColl{id: "mail_coll"}, {id: "mail_coll"},
mockColl{id: "contact_coll"}, {id: "contact_coll"},
}, },
}, },
{ {
@ -658,16 +658,33 @@ func (suite *OperationsManifestsUnitSuite) TestProduceManifestsAndMetadata() {
expect, got := []string{}, []string{} expect, got := []string{}, []string{}
for _, dc := range test.expectDCS { for _, dc := range test.expectDCS {
mc, ok := dc.(mockColl) expect = append(expect, dc.id)
assert.True(t, ok)
expect = append(expect, mc.id)
} }
for _, dc := range dcs { for _, dc := range dcs {
mc, ok := dc.(mockColl) if !assert.IsTypef(
assert.True(t, ok) t,
data.NotFoundRestoreCollection{},
dc,
"unexpected type returned [%T]",
dc,
) {
continue
}
tmp := dc.(data.NotFoundRestoreCollection)
if !assert.IsTypef(
t,
mockColl{},
tmp.Collection,
"unexpected type returned [%T]",
tmp.Collection,
) {
continue
}
mc := tmp.Collection.(mockColl)
got = append(got, mc.id) got = append(got, mc.id)
} }

View File

@ -62,7 +62,11 @@ func (suite *RestoreOpSuite) TestRestoreOperation_PersistResults() {
bytesRead: &stats.ByteCounter{ bytesRead: &stats.ByteCounter{
NumBytes: 42, NumBytes: 42,
}, },
cs: []data.RestoreCollection{&mockconnector.MockExchangeDataCollection{}}, cs: []data.RestoreCollection{
data.NotFoundRestoreCollection{
Collection: &mockconnector.MockExchangeDataCollection{},
},
},
gc: &support.ConnectorOperationStatus{ gc: &support.ConnectorOperationStatus{
ObjectCount: 1, ObjectCount: 1,
Successful: 1, Successful: 1,