Enhance OneNote skip check (#4517)

This enhances the check that allows skipping OneNote files that cannot be downloaded
with a mime type check.

This handles cases where the folder the item is in does not have the `package` facet.

---

#### Does this PR need a docs update or release note?

- [x]  Yes, it's included
- [ ] 🕐 Yes, but in a later PR
- [ ]  No

#### Type of change

<!--- Please check the type of change your PR introduces: --->
- [ ] 🌻 Feature
- [x] 🐛 Bugfix
- [ ] 🗺️ Documentation
- [ ] 🤖 Supportability/Tests
- [ ] 💻 CI/Deployment
- [ ] 🧹 Tech Debt/Cleanup

#### Issue(s)

<!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. -->
* #<issue>

#### Test Plan

<!-- How will this be tested prior to merging.-->
- [ ] 💪 Manual
- [x]  Unit test
- [ ] 💚 E2E
This commit is contained in:
Vaibhav Kamra 2023-10-17 04:13:55 +01:00 committed by GitHub
parent a4214ee524
commit 706849a006
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 25 additions and 7 deletions

View File

@ -16,6 +16,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Fix a case where missing item LastModifiedTimes could cause incremental backups to fail. - Fix a case where missing item LastModifiedTimes could cause incremental backups to fail.
- Email size metadata was incorrectly set to the size of the last attachment. Emails will now correctly report the size of the mail content plus the size of all attachments. - Email size metadata was incorrectly set to the size of the last attachment. Emails will now correctly report the size of the mail content plus the size of all attachments.
- Improves the filtering capabilities for Groups restore and backup - Improves the filtering capabilities for Groups restore and backup
- Improve check to skip OneNote files that cannot be downloaded.
### Changed ### Changed
- Groups restore now expects the site whose backup we should restore - Groups restore now expects the site whose backup we should restore

View File

@ -5,6 +5,7 @@ import (
"context" "context"
"io" "io"
"net/http" "net/http"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
@ -32,6 +33,8 @@ import (
const ( const (
// Used to compare in case of OneNote files // Used to compare in case of OneNote files
MaxOneNoteFileSize = 2 * 1024 * 1024 * 1024 MaxOneNoteFileSize = 2 * 1024 * 1024 * 1024
oneNoteMimeType = "application/msonenote"
) )
var _ data.BackupCollection = &Collection{} var _ data.BackupCollection = &Collection{}
@ -271,12 +274,15 @@ func (oc *Collection) getDriveItemContent(
return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable) return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable)
} }
var itemMimeType string
if item.GetFile() != nil {
itemMimeType = ptr.Val(item.GetFile().GetMimeType())
}
// Skip big OneNote files as they can't be downloaded // Skip big OneNote files as they can't be downloaded
if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) &&
// oc.scope == CollectionScopePackage && *item.GetSize() >= MaxOneNoteFileSize {
// TODO: We've removed the file size check because it looks like we've seen persistent // TODO: We've removed the file size check because it looks like we've seen persistent
// 503's with smaller OneNote files also. // 503's with smaller OneNote files also.
oc.scope == CollectionScopePackage { (oc.scope == CollectionScopePackage || strings.EqualFold(itemMimeType, oneNoteMimeType)) {
// FIXME: It is possible that in case of a OneNote file we // FIXME: It is possible that in case of a OneNote file we
// will end up just backing up the `onetoc2` file without // will end up just backing up the `onetoc2` file without
// the one file which is the important part of the OneNote // the one file which is the important part of the OneNote

View File

@ -531,11 +531,12 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() {
strval := "not-important" strval := "not-important"
table := []struct { table := []struct {
name string name string
colScope collectionScope colScope collectionScope
itemSize int64 itemSize int64
labels []string itemMimeType string
err error labels []string
err error
}{ }{
{ {
name: "Simple item fetch no error", name: "Simple item fetch no error",
@ -571,6 +572,14 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() {
err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)), err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)),
labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable}, labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable},
}, },
{
name: "small OneNote file",
colScope: CollectionScopeFolder,
itemMimeType: oneNoteMimeType,
itemSize: 10,
err: clues.New("small onenote error").Label(graph.LabelStatus(http.StatusServiceUnavailable)),
labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable},
},
{ {
name: "big OneNote file", name: "big OneNote file",
colScope: CollectionScopePackage, colScope: CollectionScopePackage,
@ -610,6 +619,8 @@ func (suite *GetDriveItemUnitTestSuite) TestGetDriveItem_error() {
true, true,
false) false)
stubItem.GetFile().SetMimeType(&test.itemMimeType)
mbh := mock.DefaultOneDriveBH("a-user") mbh := mock.DefaultOneDriveBH("a-user")
mbh.GI = mock.GetsItem{Item: stubItem} mbh.GI = mock.GetsItem{Item: stubItem}
mbh.GetResps = []*http.Response{{StatusCode: http.StatusOK}} mbh.GetResps = []*http.Response{{StatusCode: http.StatusOK}}