diff --git a/CHANGELOG.md b/CHANGELOG.md index 12c60ecff..67b3f7690 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix a case where missing item LastModifiedTimes could cause incremental backups to fail. - Email size metadata was incorrectly set to the size of the last attachment. Emails will now correctly report the size of the mail content plus the size of all attachments. - Improves the filtering capabilities for Groups restore and backup +- Improve check to skip OneNote files that cannot be downloaded. ### Changed - Groups restore now expects the site whose backup we should restore diff --git a/src/internal/m365/collection/drive/collection.go b/src/internal/m365/collection/drive/collection.go index 7871dc9cc..4b3618708 100644 --- a/src/internal/m365/collection/drive/collection.go +++ b/src/internal/m365/collection/drive/collection.go @@ -5,6 +5,7 @@ import ( "context" "io" "net/http" + "strings" "sync" "sync/atomic" "time" @@ -32,6 +33,8 @@ import ( const ( // Used to compare in case of OneNote files MaxOneNoteFileSize = 2 * 1024 * 1024 * 1024 + + oneNoteMimeType = "application/msonenote" ) var _ data.BackupCollection = &Collection{} @@ -271,12 +274,15 @@ func (oc *Collection) getDriveItemContent( return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable) } + var itemMimeType string + if item.GetFile() != nil { + itemMimeType = ptr.Val(item.GetFile().GetMimeType()) + } // Skip big OneNote files as they can't be downloaded if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && - // oc.scope == CollectionScopePackage && *item.GetSize() >= MaxOneNoteFileSize { // TODO: We've removed the file size check because it looks like we've seen persistent // 503's with smaller OneNote files also. - oc.scope == CollectionScopePackage { + (oc.scope == CollectionScopePackage || strings.EqualFold(itemMimeType, oneNoteMimeType)) { // FIXME: It is possible that in case of a OneNote file we // will end up just backing up the `onetoc2` file without // the one file which is the important part of the OneNote diff --git a/src/internal/m365/collection/drive/collection_test.go b/src/internal/m365/collection/drive/collection_test.go index daeb8cfb3..dcdefdaf3 100644 --- a/src/internal/m365/collection/drive/collection_test.go +++ b/src/internal/m365/collection/drive/collection_test.go @@ -531,11 +531,12 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { strval := "not-important" table := []struct { - name string - colScope collectionScope - itemSize int64 - labels []string - err error + name string + colScope collectionScope + itemSize int64 + itemMimeType string + labels []string + err error }{ { name: "Simple item fetch no error", @@ -571,6 +572,14 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, }, + { + name: "small OneNote file", + colScope: CollectionScopeFolder, + itemMimeType: oneNoteMimeType, + itemSize: 10, + err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), + labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, + }, { name: "big OneNote file", colScope: CollectionScopePackage, @@ -610,6 +619,8 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() { true, false) + stubItem.GetFile().SetMimeType(&test.itemMimeType) + mbh := mock.DefaultOneDriveBH("a-user") mbh.GI = mock.GetsItem{Item: stubItem} mbh.GetResps = []*http.Response{{StatusCode: http.StatusOK}}