Allow multiple items in DataCollection from kopia (#296)

Use a slice to back the data instead of adding directly to the channel
for two reasons (this may change in the future though):
  * kopia loads all data about a directory at the same time
  * consumers of the DataCollection may not pull items from the channel
    at a fast rate, which could block adding to the channel. This could
    lead to delays in discovering other directories to traverse in
    multi-threaded scenarios
This commit is contained in:
ashmrtn 2022-07-07 14:54:46 -07:00 committed by GitHub
parent 10f112452a
commit 1143a33ce6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 92 additions and 53 deletions

View File

@ -6,31 +6,29 @@ import (
"github.com/alcionai/corso/internal/connector" "github.com/alcionai/corso/internal/connector"
) )
var _ connector.DataCollection = &singleItemCollection{} var _ connector.DataCollection = &kopiaDataCollection{}
var _ connector.DataStream = &kopiaDataStream{} var _ connector.DataStream = &kopiaDataStream{}
// singleItemCollection implements DataCollection but only returns a single type kopiaDataCollection struct {
// DataStream. It is not safe for concurrent use. path []string
type singleItemCollection struct { streams []connector.DataStream
path []string
stream connector.DataStream
used bool
} }
func (sic *singleItemCollection) Items() <-chan connector.DataStream { func (kdc *kopiaDataCollection) Items() <-chan connector.DataStream {
if sic.used { res := make(chan connector.DataStream)
return nil go func() {
} defer close(res)
for _, s := range kdc.streams {
res <- s
}
}()
sic.used = true
res := make(chan connector.DataStream, 1)
res <- sic.stream
close(res)
return res return res
} }
func (sic singleItemCollection) FullPath() []string { func (kdc kopiaDataCollection) FullPath() []string {
return append([]string{}, sic.path...) return append([]string{}, kdc.path...)
} }
type kopiaDataStream struct { type kopiaDataStream struct {

View File

@ -9,57 +9,94 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite" "github.com/stretchr/testify/suite"
"github.com/alcionai/corso/internal/connector"
) )
// --------------- // ---------------
// unit tests // unit tests
// --------------- // ---------------
type SingleItemCollectionUnitSuite struct { type KopiaDataCollectionUnitSuite struct {
suite.Suite suite.Suite
} }
func TestSingleItemCollectionUnitSuite(t *testing.T) { func TestKopiaDataCollectionUnitSuite(t *testing.T) {
suite.Run(t, new(SingleItemCollectionUnitSuite)) suite.Run(t, new(KopiaDataCollectionUnitSuite))
} }
func (suite *SingleItemCollectionUnitSuite) TestReturnsPath() { func (suite *KopiaDataCollectionUnitSuite) TestReturnsPath() {
t := suite.T() t := suite.T()
path := []string{"some", "path", "for", "data"} path := []string{"some", "path", "for", "data"}
c := singleItemCollection{ c := kopiaDataCollection{
stream: kopiaDataStream{}, streams: []connector.DataStream{},
path: path, path: path,
} }
assert.Equal(t, c.FullPath(), path) assert.Equal(t, c.FullPath(), path)
} }
func (suite *SingleItemCollectionUnitSuite) TestReturnsOnlyOneItem() { func (suite *KopiaDataCollectionUnitSuite) TestReturnsStreams() {
t := suite.T() data := [][]byte{
[]byte("abcdefghijklmnopqrstuvwxyz"),
data := []byte("abcdefghijklmnopqrstuvwxyz") []byte("zyxwvutsrqponmlkjihgfedcba"),
uuid := "a-file"
stream := &kopiaDataStream{
reader: io.NopCloser(bytes.NewReader(data)),
uuid: uuid,
} }
c := singleItemCollection{ uuids := []string{
stream: stream, "a-file",
path: []string{}, "another-file",
} }
count := 0 table := []struct {
for returnedStream := range c.Items() { name string
assert.Equal(t, returnedStream.UUID(), uuid) streams []connector.DataStream
}{
buf, err := ioutil.ReadAll(returnedStream.ToReader()) {
require.NoError(t, err) name: "SingleStream",
assert.Equal(t, buf, data) streams: []connector.DataStream{
&kopiaDataStream{
count++ reader: io.NopCloser(bytes.NewReader(data[0])),
uuid: uuids[0],
},
},
},
{
name: "MultipleStreams",
streams: []connector.DataStream{
&kopiaDataStream{
reader: io.NopCloser(bytes.NewReader(data[0])),
uuid: uuids[0],
},
&kopiaDataStream{
reader: io.NopCloser(bytes.NewReader(data[1])),
uuid: uuids[1],
},
},
},
} }
assert.Equal(t, 1, count) for _, test := range table {
suite.T().Run(test.name, func(t *testing.T) {
c := kopiaDataCollection{
streams: test.streams,
path: []string{},
}
count := 0
for returnedStream := range c.Items() {
require.Less(t, count, len(test.streams))
assert.Equal(t, returnedStream.UUID(), uuids[count])
buf, err := ioutil.ReadAll(returnedStream.ToReader())
require.NoError(t, err)
assert.Equal(t, buf, data[count])
count++
}
assert.Equal(t, len(test.streams), count)
})
}
} }

View File

@ -329,10 +329,12 @@ func (w Wrapper) restoreSingleItem(
pathWithRoot := []string{rootDir.Name()} pathWithRoot := []string{rootDir.Name()}
pathWithRoot = append(pathWithRoot, itemPath[:len(itemPath)-1]...) pathWithRoot = append(pathWithRoot, itemPath[:len(itemPath)-1]...)
return &singleItemCollection{ return &kopiaDataCollection{
stream: kopiaDataStream{ streams: []connector.DataStream{
uuid: itemPath[len(itemPath)-1], &kopiaDataStream{
reader: r, uuid: f.Name(),
reader: r,
},
}, },
path: pathWithRoot, path: pathWithRoot,
}, nil }, nil

View File

@ -302,11 +302,13 @@ func (suite *KopiaSimpleRepoIntegrationSuite) SetupTest() {
suite.w = &Wrapper{c} suite.w = &Wrapper{c}
collections := []connector.DataCollection{ collections := []connector.DataCollection{
&singleItemCollection{ &kopiaDataCollection{
path: testPath, path: testPath,
stream: &kopiaDataStream{ streams: []connector.DataStream{
uuid: testFileUUID, &kopiaDataStream{
reader: io.NopCloser(bytes.NewReader(testFileData)), uuid: testFileUUID,
reader: io.NopCloser(bytes.NewReader(testFileData)),
},
}, },
}, },
} }