From 706849a006b9fca1edb4009e5b24c4e9c842a355 Mon Sep 17 00:00:00 2001 From: Vaibhav Kamra Date: Tue, 17 Oct 2023 04:13:55 +0100 Subject: [PATCH] Enhance OneNote skip check (#4517) This enhances the check that allows skipping OneNote files that cannot be downloaded with a mime type check. This handles cases where the folder the item is in does not have the `package` facet. --- #### Does this PR need a docs update or release note? - [x] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [ ] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [ ] :muscle: Manual - [x] :zap: Unit test - [ ] :green_heart: E2E --- CHANGELOG.md | 1 + .../m365/collection/drive/collection.go | 10 +++++++-- .../m365/collection/drive/collection_test.go | 21 ++++++++++++++----- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12c60ecff..67b3f7690 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix a case where missing item LastModifiedTimes could cause incremental backups to fail. - Email size metadata was incorrectly set to the size of the last attachment. Emails will now correctly report the size of the mail content plus the size of all attachments. - Improves the filtering capabilities for Groups restore and backup +- Improve check to skip OneNote files that cannot be downloaded. ### Changed - Groups restore now expects the site whose backup we should restore diff --git a/src/internal/m365/collection/drive/collection.go b/src/internal/m365/collection/drive/collection.go index 7871dc9cc..4b3618708 100644 --- a/src/internal/m365/collection/drive/collection.go +++ b/src/internal/m365/collection/drive/collection.go @@ -5,6 +5,7 @@ import ( "context" "io" "net/http" + "strings" "sync" "sync/atomic" "time" @@ -32,6 +33,8 @@ import ( const ( // Used to compare in case of OneNote files MaxOneNoteFileSize = 2 * 1024 * 1024 * 1024 + + oneNoteMimeType = "application/msonenote" ) var _ data.BackupCollection = &Collection{} @@ -271,12 +274,15 @@ func (oc *Collection) getDriveItemContent( return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable) } + var itemMimeType string + if item.GetFile() != nil { + itemMimeType = ptr.Val(item.GetFile().GetMimeType()) + } // Skip big OneNote files as they can't be downloaded if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && - // oc.scope == CollectionScopePackage && *item.GetSize() >= MaxOneNoteFileSize { // TODO: We've removed the file size check because it looks like we've seen persistent // 503's with smaller OneNote files also. - oc.scope == CollectionScopePackage { + (oc.scope == CollectionScopePackage || strings.EqualFold(itemMimeType, oneNoteMimeType)) { // FIXME: It is possible that in case of a OneNote file we // will end up just backing up the `onetoc2` file without // the one file which is the important part of the OneNote diff --git a/src/internal/m365/collection/drive/collection_test.go b/src/internal/m365/collection/drive/collection_test.go index daeb8cfb3..dcdefdaf3 100644 --- a/src/internal/m365/collection/drive/collection_test.go +++ b/src/internal/m365/collection/drive/collection_test.go @@ -531,11 +531,12 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { strval := "not-important" table := []struct { - name string - colScope collectionScope - itemSize int64 - labels []string - err error + name string + colScope collectionScope + itemSize int64 + itemMimeType string + labels []string + err error }{ { name: "Simple item fetch no error", @@ -571,6 +572,14 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, }, + { + name: "small OneNote file", + colScope: CollectionScopeFolder, + itemMimeType: oneNoteMimeType, + itemSize: 10, + err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, + }, { name: "big OneNote file", colScope: CollectionScopePackage, @@ -610,6 +619,8 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { true, false) + stubItem.GetFile().SetMimeType(&test.itemMimeType) + mbh := mock.DefaultOneDriveBH("a-user") mbh.GI = mock.GetsItem{Item: stubItem} mbh.GetResps = []*http.Response{{StatusCode: http.StatusOK}}