continued progress, setting aside

This commit is contained in:
ryanfkeepers 2023-02-07 11:12:23 -07:00
parent 9059ad758b
commit c2d206c2a2
3 changed files with 253 additions and 229 deletions

View File

@ -23,7 +23,7 @@ import (
const (
logGraphRequestsEnvKey = "LOG_GRAPH_REQUESTS"
numberOfRetries = 3
numberOfRetries = 1
)
// AllMetadataFileNames produces the standard set of filenames used to store graph
@ -313,7 +313,7 @@ func RunWithRetry(run func() error) error {
}
if i < numberOfRetries {
time.Sleep(time.Duration(3*(i+2)) * time.Second)
time.Sleep(time.Duration(0*(i+2)) * time.Second)
}
}

View File

@ -11,7 +11,6 @@ import (
"time"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/pkg/errors"
"github.com/spatialcurrent/go-lazy/pkg/lazy"
"github.com/alcionai/corso/src/internal/connector/graph"
@ -216,7 +215,7 @@ func (oc *Collection) populateItems(ctx context.Context) {
// Retrieve the OneDrive folder path to set later in `details.OneDriveInfo`
parentPathString, err := path.GetDriveFolderPath(oc.folderPath)
if err != nil {
oc.reportAsCompleted(ctx, 0, 0, 0, err)
oc.reportAsCompleted(ctx, 0, 0, 0, 0, 0, err)
return
}
@ -237,11 +236,11 @@ func (oc *Collection) populateItems(ctx context.Context) {
errs = support.WrapAndAppend(id, err, errs)
}
countUpdater = func(size, dirs, items, dReads, iReads int64) {
atomic.AddInt64(&dirsRead, dReads)
atomic.AddInt64(&itemsRead, iReads)
atomic.AddInt64(&byteCount, size)
atomic.AddInt64(&dirsFound, dirs)
atomic.AddInt64(&itemsFound, dirs)
atomic.AddInt64(&itemsFound, items)
atomic.AddInt64(&dirsRead, dReads)
atomic.AddInt64(&itemsRead, iReads)
}
)
@ -265,6 +264,22 @@ func (oc *Collection) populateItems(ctx context.Context) {
wg.Add(1)
// fetch the item's metadata (if it has any, will no-op otherwise),
// and stream it into the collection's data channel
go oc.streamItemMeta(
ctx,
&wg,
semaphoreCh,
folderProgress,
errUpdater,
countUpdater,
item,
parentPathString)
semaphoreCh <- struct{}{}
wg.Add(1)
// fetch the item, and stream it into the collection's data channel
go oc.streamItem(
ctx,
@ -279,7 +294,109 @@ func (oc *Collection) populateItems(ctx context.Context) {
wg.Wait()
oc.reportAsCompleted(ctx, itemsFound, itemsRead, byteCount, errs)
oc.reportAsCompleted(ctx, dirsFound, dirsRead, itemsFound, itemsRead, byteCount, errs)
}
func (oc *Collection) streamItemMeta(
ctx context.Context,
wg *sync.WaitGroup,
semaphore <-chan struct{},
progress chan<- struct{},
errUpdater func(string, error),
countUpdater func(int64, int64, int64, int64, int64),
item models.DriveItemable,
parentPath string,
) {
defer wg.Done()
defer func() { <-semaphore }()
var (
id = *item.GetId()
name = *item.GetName()
size = *item.GetSize()
isFile = item.GetFile() != nil
lazyRC io.ReadCloser
info details.ItemInfo
metadata io.ReadCloser
suffix string
dirsFound int64
dirsRead int64
itemsFound int64
itemsRead int64
err error
)
// we only handle metadata for onedrive right now
if oc.source != OneDriveSource {
return
}
if isFile {
itemsFound, itemsRead = 1, 1
suffix = MetaFileSuffix
} else {
dirsFound, dirsRead = 1, 1
suffix = DirMetaFileSuffix
}
info.OneDrive = oneDriveItemInfo(item, size)
info.OneDrive.ParentPath = parentPath
// Construct a new lazy readCloser to feed to the collection consumer.
// This ensures that downloads won't be attempted unless that consumer
// attempts to read bytes. Assumption is that kopia will check things
// like file modtimes before attempting to read.
lazyRC = lazy.NewLazyReadCloser(func() (io.ReadCloser, error) {
var msize int
metadata, msize, err = getItemMeta(
ctx,
oc.service,
oc.driveID,
item,
maxRetries,
oc.ctrl.ToggleFeatures.EnablePermissionsBackup,
oc.itemMetaReader)
if err != nil {
errUpdater(id, err)
return nil, err
}
name += suffix
size = int64(msize)
progReader, closer := observe.ItemProgress(
ctx,
metadata,
observe.ItemBackupMsg,
observe.PII(name),
size)
go closer()
return progReader, nil
})
// Item read successfully, record its addition.
//
// Note: this can cause inaccurate counts. Right now it counts all
// the items we intend to read. Errors within the lazy readCloser
// will create a conflict: an item is both successful and erroneous.
// But the async control to fix that is more error-prone than helpful.
//
// TODO: transform this into a stats bus so that async control of stats
// aggregation is handled at the backup level, not at the item iteration
// level.
countUpdater(size, dirsFound, itemsFound, dirsRead, itemsRead)
// stream the item to the data consumer.
oc.data <- &Item{
id: name,
data: lazyRC,
info: info,
}
progress <- struct{}{}
}
func (oc *Collection) streamItem(
@ -296,111 +413,69 @@ func (oc *Collection) streamItem(
defer func() { <-semaphore }()
var (
itemID = *item.GetId()
itemName = *item.GetName()
itemSize = *item.GetSize()
id = *item.GetId()
name = *item.GetName()
size = *item.GetSize()
isFile = item.GetFile() != nil
itemInfo details.ItemInfo
itemMeta io.ReadCloser
dataSuffix string
metaSuffix string
lazyRC io.ReadCloser
info details.ItemInfo
dirsFound int64
itemsFound int64
dirsRead int64
itemsRead int64
itemMetaSize int
err error
)
if isFile {
itemsFound++
itemsRead++
metaSuffix = MetaFileSuffix
} else {
dirsFound++
dirsRead++
metaSuffix = DirMetaFileSuffix
}
switch oc.source {
case SharePointSource:
itemInfo.SharePoint = sharePointItemInfo(item, itemSize)
itemInfo.SharePoint.ParentPath = parentPath
info.SharePoint = sharePointItemInfo(item, size)
info.SharePoint.ParentPath = parentPath
default:
itemInfo.OneDrive = oneDriveItemInfo(item, itemSize)
itemInfo.OneDrive.ParentPath = parentPath
dataSuffix = DataFileSuffix
info.OneDrive = oneDriveItemInfo(item, size)
info.OneDrive.ParentPath = parentPath
}
// directory handling
if !isFile {
// Construct a new lazy readCloser to feed to the collection consumer.
// This ensures that downloads won't be attempted unless that consumer
// attempts to read bytes. Assumption is that kopia will check things
// like file modtimes before attempting to read.
metaReader := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) {
if oc.source == OneDriveSource {
itemMeta, itemMetaSize, err = getItemMeta(
ctx,
oc.service,
oc.driveID,
item,
maxRetries,
oc.ctrl.ToggleFeatures.EnablePermissionsBackup,
oc.itemMetaReader)
if err != nil {
errUpdater(itemID, err)
return nil, err
}
}
progReader, closer := observe.ItemProgress(
ctx, itemMeta, observe.ItemBackupMsg,
observe.PII(itemName+metaSuffix), int64(itemMetaSize))
go closer()
return progReader, nil
})
} else {
// Construct a new lazy readCloser to feed to the collection consumer.
// This ensures that downloads won't be attempted unless that consumer
// attempts to read bytes. Assumption is that kopia will check things
// like file modtimes before attempting to read.
lazyData := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) {
// Read the item
var (
itemData io.ReadCloser
err error
)
itemData, item, err = readDriveItem(
lazyRC = lazy.NewLazyReadCloser(func() (io.ReadCloser, error) {
itemData, _, err := readDriveItem(
ctx,
oc.service,
oc.itemClient,
oc.driveID, itemID,
oc.driveID, id,
item,
oc.itemReader,
oc.itemGetter)
if err != nil {
errUpdater(itemID, err)
errUpdater(id, err)
return nil, err
}
// display/log the item download
// this separates onedrive items from their metadata
if oc.source == OneDriveSource && isFile {
name += DataFileSuffix
}
progReader, closer := observe.ItemProgress(
ctx,
itemData,
observe.ItemBackupMsg,
observe.PII(itemName+dataSuffix),
itemSize)
observe.PII(name),
size)
go closer()
return progReader, nil
})
}
// Item read successfully, record its addition.
//
@ -412,21 +487,13 @@ func (oc *Collection) streamItem(
// TODO: transform this into a stats bus so that async control of stats
// aggregation is handled at the backup level, not at the item iteration
// level.
countUpdater(itemSize, dirsFound, itemsFound, dirsRead, itemsRead)
if hasMeta {
oc.data <- &Item{
id: itemName + metaSuffix,
data: metaReader,
info: itemInfo,
}
}
countUpdater(size, dirsFound, itemsFound, dirsRead, itemsRead)
// stream the item to the data consumer.
oc.data <- &Item{
id: itemName + dataSuffix,
data: lazyData,
info: itemInfo,
id: name,
data: lazyRC,
info: info,
}
progress <- struct{}{}
@ -441,12 +508,6 @@ func getItemMeta(
enablePermissionsBackup bool,
read itemMetaReaderFunc,
) (io.ReadCloser, int, error) {
var (
rc io.ReadCloser
size int
)
for i := 1; i <= maxRetries; i++ {
if !enablePermissionsBackup {
// We are still writing the metadata file but with
// empty permissions as we don't have a way to
@ -455,18 +516,16 @@ func getItemMeta(
return io.NopCloser(strings.NewReader("{}")), 2, nil
}
var err error
rc, size, err = read(ctx, service, driveID, item)
if err == nil ||
!graph.IsErrTimeout(err) ||
!graph.IsInternalServerError(err) {
return nil, 0, errors.Wrap(err, "getting item metadata")
}
var (
rc io.ReadCloser
size int
err error
)
if i < maxRetries {
time.Sleep(1 * time.Second)
}
}
err = graph.RunWithRetry(func() error {
rc, size, err = read(ctx, service, driveID, item)
return err
})
return rc, size, nil
}
@ -486,48 +545,26 @@ func readDriveItem(
item = original
)
for i := 0; i < maxRetries; i++ {
err = graph.RunWithRetry(func() error {
_, rc, err = read(itemClient, item)
if err == nil {
return nil, nil, errors.Wrap(err, "reading drive item")
}
if graph.IsErrUnauthorized(err) {
// assume unauthorized requests are a sign of an expired
// jwt token, and that we've overrun the available window
// to download the actual file. Re-downloading the item
// will refresh that download url.
di, diErr := get(ctx, service, driveID, itemID)
if diErr != nil {
return nil, nil, errors.Wrap(diErr, "retrieving item to refresh download url")
}
item = di
continue
} else if !graph.IsErrTimeout(err) &&
!graph.IsInternalServerError(err) {
// for all non-timeout, non-internal errors, do not retry
break
}
if i < maxRetries {
time.Sleep(1 * time.Second)
}
}
return err
})
return rc, item, err
}
func (oc *Collection) reportAsCompleted(ctx context.Context, itemsRead, itemsFound, byteCount int64, errs error) {
func (oc *Collection) reportAsCompleted(
ctx context.Context,
dirsFound, dirsRead, itemsFound, itemsRead, byteCount int64,
errs error,
) {
close(oc.data)
status := support.CreateStatus(ctx, support.Backup,
1, // num folders (always 1)
support.CollectionMetrics{
Objects: int(itemsFound), // items to read,
Successes: int(itemsRead), // items read successfully,
Objects: int(itemsFound) + int(dirsFound), // items to read,
Successes: int(itemsRead) + int(dirsRead), // items read successfully,
TotalBytes: byteCount, // Number of bytes read in the operation,
},
errs,

View File

@ -63,6 +63,11 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
}}
)
testItemMetaBytes, err := json.Marshal(testItemMeta)
if err != nil {
suite.T().Fatal("unable to marshall test permissions", err)
}
type nst struct {
name string
size int64
@ -181,12 +186,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
_ string,
_ models.DriveItemable,
) (io.ReadCloser, int, error) {
metaJSON, err := json.Marshal(testItemMeta)
if err != nil {
return nil, 0, err
}
return io.NopCloser(bytes.NewReader(metaJSON)), len(metaJSON), nil
return io.NopCloser(bytes.NewReader(testItemMetaBytes)), len(testItemMetaBytes), nil
}
// Read items from the collection
@ -199,26 +199,37 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
wg.Wait()
if test.source == OneDriveSource {
require.Len(t, readItems, 2) // .data and .meta
assert.Len(t, readItems, 2) // .data and .meta
assert.Equal(t, 2, collStatus.ObjectCount)
assert.Equal(t, 2, collStatus.Successful)
} else {
require.Len(t, readItems, 1)
assert.Len(t, readItems, 1)
assert.Equal(t, 1, collStatus.ObjectCount)
assert.Equal(t, 1, collStatus.Successful)
}
// Expect only 1 item
require.Equal(t, 1, collStatus.ObjectCount)
require.Equal(t, 1, collStatus.Successful)
var (
foundData bool
foundMeta bool
)
// Validate item info and data
readItem := readItems[0]
for _, readItem := range readItems {
readItemInfo := readItem.(data.StreamInfo)
id := readItem.UUID()
if test.source == OneDriveSource {
assert.Equal(t, testItemName+DataFileSuffix, readItem.UUID())
} else {
assert.Equal(t, testItemName, readItem.UUID())
if strings.HasSuffix(id, DataFileSuffix) {
foundData = true
}
var hasMeta bool
if strings.HasSuffix(id, MetaFileSuffix) {
foundMeta = true
hasMeta = true
}
assert.Contains(t, testItemName, id)
require.Implements(t, (*data.StreamModTime)(nil), readItem)
mt := readItem.(data.StreamModTime)
assert.Equal(t, now, mt.ModTime())
@ -226,25 +237,20 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
require.NoError(t, err)
name, parentPath := test.infoFrom(t, readItemInfo.Info())
assert.Equal(t, testItemData, readData)
assert.Equal(t, testItemName, name)
assert.Equal(t, driveFolderPath, parentPath)
if test.source == OneDriveSource {
readItemMeta := readItems[1]
assert.Equal(t, testItemName+MetaFileSuffix, readItemMeta.UUID())
readMetaData, err := io.ReadAll(readItemMeta.ToReader())
if hasMeta {
ra, err := io.ReadAll(readItem.ToReader())
require.NoError(t, err)
tm, err := json.Marshal(testItemMeta)
if err != nil {
t.Fatal("unable to marshall test permissions", err)
assert.Equal(t, testItemMetaBytes, ra)
}
}
assert.Equal(t, tm, readMetaData)
if test.source == OneDriveSource {
assert.True(t, foundData, "found data file")
assert.True(t, foundMeta, "found metadata file")
}
})
}
@ -359,7 +365,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionDisablePermissionsBackup() {
graph.HTTPClient(graph.NoTimeout()),
folderPath,
"fakeDriveID",
suite,
&MockGraphService{},
suite.testStatusUpdater(&wg, &collStatus),
test.source,
control.Options{ToggleFeatures: control.Toggles{}})
@ -398,15 +404,14 @@ func (suite *CollectionUnitTestSuite) TestCollectionDisablePermissionsBackup() {
wg.Wait()
// Expect no items
require.Equal(t, 1, collStatus.ObjectCount)
require.Equal(t, 1, collStatus.Successful)
assert.Equal(t, 1, collStatus.ObjectCount, "total objects")
assert.Equal(t, 1, collStatus.Successful, "successes")
for _, i := range readItems {
if strings.HasSuffix(i.UUID(), MetaFileSuffix) {
content, err := io.ReadAll(i.ToReader())
require.NoError(t, err)
require.Equal(t, content, []byte("{}"))
assert.Equal(t, content, []byte("{}"))
}
}
})
@ -494,7 +499,7 @@ func (suite *CollectionUnitTestSuite) TestStreamItem() {
errsIs: func(t *testing.T, e error, count int) {
assert.True(t, graph.IsErrUnauthorized(e), "is unauthorized error")
assert.ErrorIs(t, e, graph.Err401Unauthorized)
assert.Equal(t, 2, count, "count of errors aggregated")
assert.Equal(t, 1, count, "count of errors aggregated")
},
readErrIs: func(t *testing.T, e error) {
assert.True(t, graph.IsErrUnauthorized(e), "is unauthorized error")
@ -519,24 +524,6 @@ func (suite *CollectionUnitTestSuite) TestStreamItem() {
assert.ErrorIs(t, e, context.DeadlineExceeded)
},
},
{
name: "throttled errors",
expectData: "",
coll: &Collection{
data: mockDataChan(),
itemReader: mockReader("foo", graph.Err429TooManyRequests),
itemGetter: mockGetter(nil),
},
errsIs: func(t *testing.T, e error, count int) {
assert.True(t, graph.IsErrThrottled(e), "is throttled error")
assert.ErrorIs(t, e, graph.Err429TooManyRequests)
assert.Equal(t, 1, count, "one errors")
},
readErrIs: func(t *testing.T, e error) {
assert.True(t, graph.IsErrThrottled(e), "is throttled error")
assert.ErrorIs(t, e, graph.Err429TooManyRequests)
},
},
{
name: "internal server errors",
expectData: "",
@ -567,7 +554,7 @@ func (suite *CollectionUnitTestSuite) TestStreamItem() {
errCount int
size int64
countUpdater = func(sz int64) { size = sz }
countUpdater = func(sz, ds, itms, drd, ird int64) { size = sz }
errUpdater = func(s string, e error) {
errs = multierror.Append(errs, e)
errCount++