diff --git a/src/internal/m365/collection/drive/collection.go b/src/internal/m365/collection/drive/collection.go index 4b3618708..518aaaf24 100644 --- a/src/internal/m365/collection/drive/collection.go +++ b/src/internal/m365/collection/drive/collection.go @@ -75,13 +75,14 @@ type Collection struct { // Specifies if it new, moved/rename or deleted state data.CollectionState - // scope specifies what scope the items in a collection belongs - // to. This is primarily useful when dealing with a "package", - // like in the case of a OneNote file. A OneNote file is a - // collection with a package scope and multiple files in it. Most - // other collections have a scope of folder to indicate that the - // files within them belong to a folder. - scope collectionScope + // true if this collection, or a parent directory of this collection, + // is marked as a package. + // packages are only marked on the top-level directory, but special-case + // handling need apply to all subfolders. Therefore it is necessary to cascade + // that identification to all affected collections, not just those that identify + // as packages themselves. + // see: https://learn.microsoft.com/en-us/graph/api/resources/package?view=graph-rest-1.0 + isPackageOrChildOfPackage bool // should only be true if the old delta token expired doNotMergeItems bool @@ -111,7 +112,7 @@ func NewCollection( driveID string, statusUpdater support.StatusUpdater, ctrlOpts control.Options, - colScope collectionScope, + isPackageOrChildOfPackage bool, doNotMergeItems bool, urlCache getItemPropertyer, ) (*Collection, error) { @@ -137,7 +138,7 @@ func NewCollection( driveID, statusUpdater, ctrlOpts, - colScope, + isPackageOrChildOfPackage, doNotMergeItems, urlCache) @@ -155,24 +156,26 @@ func newColl( driveID string, statusUpdater support.StatusUpdater, ctrlOpts control.Options, - colScope collectionScope, + isPackageOrChildOfPackage bool, doNotMergeItems bool, urlCache getItemPropertyer, ) *Collection { + dataCh := make(chan data.Item, graph.Parallelism(path.OneDriveMetadataService).CollectionBufferSize()) + c := &Collection{ - handler: handler, - protectedResource: resource, - folderPath: currPath, - prevPath: prevPath, - driveItems: map[string]models.DriveItemable{}, - driveID: driveID, - data: make(chan data.Item, graph.Parallelism(path.OneDriveMetadataService).CollectionBufferSize()), - statusUpdater: statusUpdater, - ctrl: ctrlOpts, - state: data.StateOf(prevPath, currPath), - scope: colScope, - doNotMergeItems: doNotMergeItems, - urlCache: urlCache, + handler: handler, + protectedResource: resource, + folderPath: currPath, + prevPath: prevPath, + driveItems: map[string]models.DriveItemable{}, + driveID: driveID, + data: dataCh, + statusUpdater: statusUpdater, + ctrl: ctrlOpts, + state: data.StateOf(prevPath, currPath), + isPackageOrChildOfPackage: isPackageOrChildOfPackage, + doNotMergeItems: doNotMergeItems, + urlCache: urlCache, } return c @@ -280,9 +283,10 @@ func (oc *Collection) getDriveItemContent( } // Skip big OneNote files as they can't be downloaded if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && + // oc.isPackageOrChildOfPackage && *item.GetSize() >= MaxOneNoteFileSize { // TODO: We've removed the file size check because it looks like we've seen persistent // 503's with smaller OneNote files also. - (oc.scope == CollectionScopePackage || strings.EqualFold(itemMimeType, oneNoteMimeType)) { + oc.isPackageOrChildOfPackage || strings.EqualFold(itemMimeType, oneNoteMimeType) { // FIXME: It is possible that in case of a OneNote file we // will end up just backing up the `onetoc2` file without // the one file which is the important part of the OneNote @@ -429,18 +433,22 @@ func (oc *Collection) streamItems(ctx context.Context, errs *fault.Bus) { return } - queuedPath := oc.handler.FormatDisplayPath(oc.driveName, parentPath) + displayPath := oc.handler.FormatDisplayPath(oc.driveName, parentPath) folderProgress := observe.ProgressWithCount( ctx, observe.ItemQueueMsg, - path.NewElements(queuedPath), + path.NewElements(displayPath), int64(len(oc.driveItems))) defer close(folderProgress) semaphoreCh := make(chan struct{}, graph.Parallelism(path.OneDriveService).Item()) defer close(semaphoreCh) + ctx = clues.Add(ctx, + "parent_path", parentPath, + "is_package", oc.isPackageOrChildOfPackage) + for _, item := range oc.driveItems { if errs.Failure() != nil { break diff --git a/src/internal/m365/collection/drive/collection_test.go b/src/internal/m365/collection/drive/collection_test.go index dcdefdaf3..923a062ee 100644 --- a/src/internal/m365/collection/drive/collection_test.go +++ b/src/internal/m365/collection/drive/collection_test.go @@ -213,7 +213,7 @@ func (suite *CollectionUnitSuite) TestCollection() { "drive-id", suite.testStatusUpdater(&wg, &collStatus), control.Options{ToggleFeatures: control.Toggles{}}, - CollectionScopeFolder, + false, true, nil) require.NoError(t, err, clues.ToCore(err)) @@ -335,7 +335,7 @@ func (suite *CollectionUnitSuite) TestCollectionReadError() { "fakeDriveID", suite.testStatusUpdater(&wg, &collStatus), control.Options{ToggleFeatures: control.Toggles{}}, - CollectionScopeFolder, + false, true, nil) require.NoError(t, err, clues.ToCore(err)) @@ -413,7 +413,7 @@ func (suite *CollectionUnitSuite) TestCollectionReadUnauthorizedErrorRetry() { "fakeDriveID", suite.testStatusUpdater(&wg, &collStatus), control.Options{ToggleFeatures: control.Toggles{}}, - CollectionScopeFolder, + false, true, nil) require.NoError(t, err, clues.ToCore(err)) @@ -469,7 +469,7 @@ func (suite *CollectionUnitSuite) TestCollectionPermissionBackupLatestModTime() "drive-id", suite.testStatusUpdater(&wg, &collStatus), control.Options{ToggleFeatures: control.Toggles{}}, - CollectionScopeFolder, + false, true, nil) require.NoError(t, err, clues.ToCore(err)) @@ -532,68 +532,76 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { table := []struct { name string - colScope collectionScope - itemSize int64 + isPackage bool itemMimeType string + itemSize int64 labels []string err error }{ { - name: "Simple item fetch no error", - colScope: CollectionScopeFolder, - itemSize: 10, - err: nil, + name: "Simple item fetch no error", + isPackage: false, + itemSize: 10, + err: nil, }, { - name: "Simple item fetch error", - colScope: CollectionScopeFolder, - itemSize: 10, - err: assert.AnError, + name: "Simple item fetch error", + isPackage: false, + itemSize: 10, + err: assert.AnError, }, { - name: "malware error", - colScope: CollectionScopeFolder, - itemSize: 10, - err: clues.New("malware error").Label(graph.LabelsMalware), - labels: []string{graph.LabelsMalware, graph.LabelsSkippable}, + name: "malware error", + isPackage: false, + itemSize: 10, + err: clues.New("malware error").Label(graph.LabelsMalware), + labels: []string{graph.LabelsMalware, graph.LabelsSkippable}, }, { - name: "file not found error", - colScope: CollectionScopeFolder, - itemSize: 10, - err: clues.New("not found error").Label(graph.LabelStatus(http.StatusNotFound)), - labels: []string{graph.LabelStatus(http.StatusNotFound), graph.LabelsSkippable}, + name: "file not found error", + isPackage: false, + itemSize: 10, + err: clues.New("not found error").Label(graph.LabelStatus(http.StatusNotFound)), + labels: []string{graph.LabelStatus(http.StatusNotFound), graph.LabelsSkippable}, }, { // This should create an error that stops the backup - name: "small OneNote file", - colScope: CollectionScopePackage, - itemSize: 10, - err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), - labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, + name: "small OneNote file", + isPackage: true, + itemSize: 10, + err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, }, { - name: "small OneNote file", - colScope: CollectionScopeFolder, + name: "small OneNote file with mimetype", + isPackage: true, itemMimeType: oneNoteMimeType, itemSize: 10, err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, }, { - name: "big OneNote file", - colScope: CollectionScopePackage, - itemSize: MaxOneNoteFileSize, - err: clues.New("big onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), - labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, + name: "big OneNote file with mimetype", + isPackage: true, + itemMimeType: oneNoteMimeType, + itemSize: MaxOneNoteFileSize, + err: clues.New("big onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, + }, + { + name: "big OneNote file", + isPackage: true, + itemSize: MaxOneNoteFileSize, + err: clues.New("big onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, }, { // This should block backup, only big OneNote files should be a problem - name: "big file", - colScope: CollectionScopeFolder, - itemSize: MaxOneNoteFileSize, - err: clues.New("big file error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), - labels: []string{graph.LabelStatus(http.StatusServiceUnavailable)}, + name: "big file", + isPackage: false, + itemSize: MaxOneNoteFileSize, + err: clues.New("big file error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable)}, }, } @@ -606,7 +614,7 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { var ( errs = fault.New(false) - col = &Collection{scope: test.colScope} + col = &Collection{isPackageOrChildOfPackage: test.isPackage} now = time.Now() ) @@ -992,7 +1000,7 @@ func (suite *CollectionUnitSuite) TestItemExtensions() { driveID, suite.testStatusUpdater(&wg, &collStatus), opts, - CollectionScopeFolder, + false, true, nil) require.NoError(t, err, clues.ToCore(err)) diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index c213035e9..8e48587c8 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -28,20 +28,6 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -type collectionScope int - -const ( - // CollectionScopeUnknown is used when we don't know and don't need - // to know the kind, like in the case of deletes - CollectionScopeUnknown collectionScope = 0 - - // CollectionScopeFolder is used for regular folder collections - CollectionScopeFolder collectionScope = 1 - - // CollectionScopePackage is used to represent OneNote items - CollectionScopePackage collectionScope = 2 -) - const restrictedDirectory = "Site Pages" // Collections is used to retrieve drive data for a @@ -384,7 +370,7 @@ func (c *Collections) Get( driveID, c.statusUpdater, c.ctrl, - CollectionScopeUnknown, + false, true, nil) if err != nil { @@ -421,7 +407,7 @@ func (c *Collections) Get( driveID, c.statusUpdater, c.ctrl, - CollectionScopeUnknown, + false, true, nil) if err != nil { @@ -606,7 +592,7 @@ func (c *Collections) handleDelete( driveID, c.statusUpdater, c.ctrl, - CollectionScopeUnknown, + false, // DoNotMerge is not checked for deleted items. false, nil) @@ -847,13 +833,6 @@ func (c *Collections) processItem( return nil } - colScope := CollectionScopeFolder - if item.GetPackageEscaped() != nil { - colScope = CollectionScopePackage - } - - ictx = clues.Add(ictx, "collection_scope", colScope) - col, err := NewCollection( c.handler, c.protectedResource, @@ -862,7 +841,7 @@ func (c *Collections) processItem( driveID, c.statusUpdater, c.ctrl, - colScope, + item.GetPackageEscaped() != nil, invalidPrevDelta, nil) if err != nil { diff --git a/src/internal/m365/collection/drive/collections_test.go b/src/internal/m365/collection/drive/collections_test.go index 1236ed201..073adbe6d 100644 --- a/src/internal/m365/collection/drive/collections_test.go +++ b/src/internal/m365/collection/drive/collections_test.go @@ -2706,7 +2706,7 @@ func (suite *OneDriveCollectionsUnitSuite) TestAddURLCacheToDriveCollections() { driveID, nil, control.Options{ToggleFeatures: control.Toggles{}}, - CollectionScopeFolder, + false, true, nil) require.NoError(t, err, clues.ToCore(err))