Skip large OneNote items that cannot be downloaded (#2925)
OneNote files >2GB cannot be downloaded via the graph API. This change will gracefully skip them and prevent the backup from erroring out. A OneNote file is represented by a Package(folder) which contains two file within it, `<file>.one` and `<file>.onetoc2`. From what I can tell the `onetoc2` file is the metadata file and the `one` file is the one that contains data and can get big. **The current implementation has a limitation(or feature depending on how you see it) in that we will end up doing a partial backup with just the `onetoc2` file but avoiding the big `one` file if that is big.** I did not try to change the behaviour as even currently if for somehow we ended up with an error like 404 when trying to backup one of these files, we do skip it. I've added a comment in for now, let me know if we should go back and revisit the behaviour. *The main issue is the fact that we only hit this problem once it reaches kopia and removing it at that point is harder.* --- #### Does this PR need a docs update or release note? - [x] ✅ Yes, it's included - [ ] 🕐 Yes, but in a later PR - [ ] ⛔ No #### Type of change <!--- Please check the type of change your PR introduces: ---> - [ ] 🌻 Feature - [x] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Supportability/Tests - [ ] 💻 CI/Deployment - [ ] 🧹 Tech Debt/Cleanup #### Issue(s) <!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. --> * fixes https://github.com/alcionai/corso/issues/2910 #### Test Plan <!-- How will this be tested prior to merging.--> - [ ] 💪 Manual - [x] ⚡ Unit test - [ ] 💚 E2E
This commit is contained in:
parent
b23faa0fac
commit
446ccfe491
@ -16,6 +16,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||
- Enable compression for all data uploaded by kopia.
|
||||
- SharePoint --folder selectors correctly return items.
|
||||
- Fix Exchange cli args for filtering items
|
||||
- Skip huge OneNote items that Graph API prevents us from downloading
|
||||
- Skip OneNote items bigger than 2GB (Graph API prevents us from downloading them)
|
||||
|
||||
## [v0.6.1] (beta) - 2023-03-21
|
||||
|
||||
|
||||
@ -41,6 +41,9 @@ const (
|
||||
MetaFileSuffix = ".meta"
|
||||
DirMetaFileSuffix = ".dirmeta"
|
||||
DataFileSuffix = ".data"
|
||||
|
||||
// Used to compare in case of OneNote files
|
||||
MaxOneNoteFileSize = 2 * 1024 * 1024 * 1024
|
||||
)
|
||||
|
||||
func IsMetaFile(name string) bool {
|
||||
@ -96,6 +99,14 @@ type Collection struct {
|
||||
// Specifies if it new, moved/rename or deleted
|
||||
state data.CollectionState
|
||||
|
||||
// scope specifies what scope the items in a collection belongs
|
||||
// to. This is primarily useful when dealing with a "package",
|
||||
// like in the case of a OneNote file. A OneNote file is a
|
||||
// collection with a package scope and multiple files in it. Most
|
||||
// other collections have a scope of folder to indicate that the
|
||||
// files within them belong to a folder.
|
||||
scope collectionScope
|
||||
|
||||
// should only be true if the old delta token expired
|
||||
doNotMergeItems bool
|
||||
}
|
||||
@ -134,6 +145,7 @@ func NewCollection(
|
||||
statusUpdater support.StatusUpdater,
|
||||
source driveSource,
|
||||
ctrlOpts control.Options,
|
||||
colScope collectionScope,
|
||||
doNotMergeItems bool,
|
||||
) *Collection {
|
||||
c := &Collection{
|
||||
@ -148,6 +160,7 @@ func NewCollection(
|
||||
statusUpdater: statusUpdater,
|
||||
ctrl: ctrlOpts,
|
||||
state: data.StateOf(prevPath, folderPath),
|
||||
scope: colScope,
|
||||
doNotMergeItems: doNotMergeItems,
|
||||
}
|
||||
|
||||
@ -345,12 +358,27 @@ func (oc *Collection) getDriveItemContent(
|
||||
}
|
||||
|
||||
if clues.HasLabel(err, graph.LabelStatus(http.StatusNotFound)) || graph.IsErrDeletedInFlight(err) {
|
||||
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Error("item not found")
|
||||
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Info("item not found")
|
||||
el.AddSkip(fault.FileSkip(fault.SkipNotFound, itemID, itemName, graph.ItemInfo(item)))
|
||||
|
||||
return nil, clues.Wrap(err, "downloading item").Label(graph.LabelsSkippable)
|
||||
}
|
||||
|
||||
// Skip big OneNote files as they can't be downloaded
|
||||
if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) &&
|
||||
oc.scope == CollectionScopePackage && *item.GetSize() >= MaxOneNoteFileSize {
|
||||
// FIXME: It is possible that in case of a OneNote file we
|
||||
// will end up just backing up the `onetoc2` file without
|
||||
// the one file which is the important part of the OneNote
|
||||
// "item". This will have to be handled during the
|
||||
// restore, or we have to handle it separately by somehow
|
||||
// deleting the entire collection.
|
||||
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipBigOneNote).Info("max OneNote file size exceeded")
|
||||
el.AddSkip(fault.FileSkip(fault.SkipBigOneNote, itemID, itemName, graph.ItemInfo(item)))
|
||||
|
||||
return nil, clues.Wrap(err, "downloading item").Label(graph.LabelsSkippable)
|
||||
}
|
||||
|
||||
logger.CtxErr(ctx, err).Error("downloading item")
|
||||
el.AddRecoverable(clues.Stack(err).WithClues(ctx).Label(fault.LabelForceNoBackupCreation))
|
||||
|
||||
|
||||
@ -214,6 +214,7 @@ func (suite *CollectionUnitTestSuite) TestCollection() {
|
||||
suite.testStatusUpdater(&wg, &collStatus),
|
||||
test.source,
|
||||
control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}},
|
||||
CollectionScopeFolder,
|
||||
true)
|
||||
require.NotNil(t, coll)
|
||||
assert.Equal(t, folderPath, coll.FullPath())
|
||||
@ -353,6 +354,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadError() {
|
||||
suite.testStatusUpdater(&wg, &collStatus),
|
||||
test.source,
|
||||
control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}},
|
||||
CollectionScopeFolder,
|
||||
true)
|
||||
|
||||
mockItem := models.NewDriveItem()
|
||||
@ -442,6 +444,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionReadUnauthorizedErrorRetry()
|
||||
suite.testStatusUpdater(&wg, &collStatus),
|
||||
test.source,
|
||||
control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}},
|
||||
CollectionScopeFolder,
|
||||
true)
|
||||
|
||||
mockItem := models.NewDriveItem()
|
||||
@ -541,6 +544,7 @@ func (suite *CollectionUnitTestSuite) TestCollectionPermissionBackupLatestModTim
|
||||
suite.testStatusUpdater(&wg, &collStatus),
|
||||
test.source,
|
||||
control.Options{ToggleFeatures: control.Toggles{EnablePermissionsBackup: true}},
|
||||
CollectionScopeFolder,
|
||||
true)
|
||||
|
||||
mtime := time.Now().AddDate(0, -1, 0)
|
||||
@ -597,3 +601,123 @@ func (suite *CollectionUnitTestSuite) TestCollectionPermissionBackupLatestModTim
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
type GetDriveItemUnitTestSuite struct {
|
||||
tester.Suite
|
||||
}
|
||||
|
||||
func TestGetDriveItemUnitTestSuite(t *testing.T) {
|
||||
suite.Run(t, &GetDriveItemUnitTestSuite{Suite: tester.NewUnitSuite(t)})
|
||||
}
|
||||
|
||||
func (suite *GetDriveItemUnitTestSuite) TestGetDriveItemError() {
|
||||
strval := "not-important"
|
||||
|
||||
table := []struct {
|
||||
name string
|
||||
colScope collectionScope
|
||||
itemSize int64
|
||||
labels []string
|
||||
err error
|
||||
}{
|
||||
{
|
||||
name: "Simple item fetch no error",
|
||||
colScope: CollectionScopeFolder,
|
||||
itemSize: 10,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
name: "Simple item fetch error",
|
||||
colScope: CollectionScopeFolder,
|
||||
itemSize: 10,
|
||||
err: assert.AnError,
|
||||
},
|
||||
{
|
||||
name: "malware error",
|
||||
colScope: CollectionScopeFolder,
|
||||
itemSize: 10,
|
||||
err: clues.New("test error").Label(graph.LabelsMalware),
|
||||
labels: []string{graph.LabelsMalware, graph.LabelsSkippable},
|
||||
},
|
||||
{
|
||||
name: "file not found error",
|
||||
colScope: CollectionScopeFolder,
|
||||
itemSize: 10,
|
||||
err: clues.New("test error").Label(graph.LabelStatus(http.StatusNotFound)),
|
||||
labels: []string{graph.LabelStatus(http.StatusNotFound), graph.LabelsSkippable},
|
||||
},
|
||||
{
|
||||
// This should create an error that stops the backup
|
||||
name: "small OneNote file",
|
||||
colScope: CollectionScopePackage,
|
||||
itemSize: 10,
|
||||
err: clues.New("test error").Label(graph.LabelStatus(http.StatusServiceUnavailable)),
|
||||
labels: []string{graph.LabelStatus(http.StatusServiceUnavailable)},
|
||||
},
|
||||
{
|
||||
name: "big OneNote file",
|
||||
colScope: CollectionScopePackage,
|
||||
itemSize: MaxOneNoteFileSize,
|
||||
err: clues.New("test error").Label(graph.LabelStatus(http.StatusServiceUnavailable)),
|
||||
labels: []string{graph.LabelStatus(http.StatusServiceUnavailable), graph.LabelsSkippable},
|
||||
},
|
||||
{
|
||||
// This should block backup, only big OneNote files should be a problem
|
||||
name: "big file",
|
||||
colScope: CollectionScopeFolder,
|
||||
itemSize: MaxOneNoteFileSize,
|
||||
err: clues.New("test error").Label(graph.LabelStatus(http.StatusServiceUnavailable)),
|
||||
labels: []string{graph.LabelStatus(http.StatusServiceUnavailable)},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range table {
|
||||
suite.Run(test.name, func() {
|
||||
ctx, flush := tester.NewContext()
|
||||
defer flush()
|
||||
|
||||
var (
|
||||
t = suite.T()
|
||||
errs = fault.New(false)
|
||||
item = models.NewDriveItem()
|
||||
col = &Collection{scope: test.colScope}
|
||||
)
|
||||
|
||||
item.SetId(&strval)
|
||||
item.SetName(&strval)
|
||||
item.SetSize(&test.itemSize)
|
||||
|
||||
col.itemReader = func(
|
||||
ctx context.Context,
|
||||
hc *http.Client,
|
||||
item models.DriveItemable,
|
||||
) (details.ItemInfo, io.ReadCloser, error) {
|
||||
return details.ItemInfo{}, nil, test.err
|
||||
}
|
||||
|
||||
col.itemGetter = func(
|
||||
ctx context.Context,
|
||||
srv graph.Servicer,
|
||||
driveID, itemID string,
|
||||
) (models.DriveItemable, error) {
|
||||
// We are not testing this err here
|
||||
return item, nil
|
||||
}
|
||||
|
||||
_, err := col.getDriveItemContent(ctx, item, errs)
|
||||
if test.err == nil {
|
||||
assert.NoError(t, err, "no error")
|
||||
return
|
||||
}
|
||||
|
||||
assert.EqualError(t, err, clues.Wrap(test.err, "downloading item").Error(), "error")
|
||||
|
||||
labelsMap := map[string]struct{}{}
|
||||
for _, l := range test.labels {
|
||||
labelsMap[l] = struct{}{}
|
||||
}
|
||||
|
||||
assert.Equal(t, labelsMap, clues.Labels(err))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@ -33,6 +33,20 @@ const (
|
||||
SharePointSource
|
||||
)
|
||||
|
||||
type collectionScope int
|
||||
|
||||
const (
|
||||
// CollectionScopeUnknown is used when we don't know and don't need
|
||||
// to know the kind, like in the case of deletes
|
||||
CollectionScopeUnknown collectionScope = iota
|
||||
|
||||
// CollectionScopeFolder is used for regular folder collections
|
||||
CollectionScopeFolder
|
||||
|
||||
// CollectionScopePackage is used to represent OneNote items
|
||||
CollectionScopePackage
|
||||
)
|
||||
|
||||
const (
|
||||
restrictedDirectory = "Site Pages"
|
||||
rootDrivePattern = "/drives/%s/root:"
|
||||
@ -411,6 +425,7 @@ func (c *Collections) Get(
|
||||
c.statusUpdater,
|
||||
c.source,
|
||||
c.ctrl,
|
||||
CollectionScopeUnknown,
|
||||
true)
|
||||
|
||||
c.CollectionMap[driveID][fldID] = col
|
||||
@ -572,6 +587,7 @@ func (c *Collections) handleDelete(
|
||||
c.statusUpdater,
|
||||
c.source,
|
||||
c.ctrl,
|
||||
CollectionScopeUnknown,
|
||||
// DoNotMerge is not checked for deleted items.
|
||||
false)
|
||||
|
||||
@ -744,6 +760,11 @@ func (c *Collections) UpdateCollections(
|
||||
continue
|
||||
}
|
||||
|
||||
colScope := CollectionScopeFolder
|
||||
if item.GetPackage() != nil {
|
||||
colScope = CollectionScopePackage
|
||||
}
|
||||
|
||||
col := NewCollection(
|
||||
c.itemClient,
|
||||
collectionPath,
|
||||
@ -753,6 +774,7 @@ func (c *Collections) UpdateCollections(
|
||||
c.statusUpdater,
|
||||
c.source,
|
||||
c.ctrl,
|
||||
colScope,
|
||||
invalidPrevDelta,
|
||||
)
|
||||
col.driveName = driveName
|
||||
|
||||
@ -33,4 +33,5 @@ type SkippedCounts struct {
|
||||
TotalSkippedItems int `json:"totalSkippedItems"`
|
||||
SkippedMalware int `json:"skippedMalware"`
|
||||
SkippedNotFound int `json:"skippedNotFound"`
|
||||
SkippedInvalidOneNoteFile int `json:"skippedInvalidOneNoteFile"`
|
||||
}
|
||||
|
||||
@ -3,6 +3,7 @@ package backup
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/alcionai/corso/src/cli/print"
|
||||
@ -78,7 +79,9 @@ func New(
|
||||
errCount = len(fe.Items)
|
||||
skipCount = len(fe.Skipped)
|
||||
failMsg string
|
||||
malware, notFound, otherSkips int
|
||||
|
||||
malware, notFound,
|
||||
invalidONFile, otherSkips int
|
||||
)
|
||||
|
||||
if fe.Failure != nil {
|
||||
@ -92,6 +95,8 @@ func New(
|
||||
malware++
|
||||
case s.HasCause(fault.SkipNotFound):
|
||||
notFound++
|
||||
case s.HasCause(fault.SkipBigOneNote):
|
||||
invalidONFile++
|
||||
default:
|
||||
otherSkips++
|
||||
}
|
||||
@ -124,6 +129,7 @@ func New(
|
||||
TotalSkippedItems: skipCount,
|
||||
SkippedMalware: malware,
|
||||
SkippedNotFound: notFound,
|
||||
SkippedInvalidOneNoteFile: invalidONFile,
|
||||
},
|
||||
}
|
||||
}
|
||||
@ -211,23 +217,27 @@ func (b Backup) Values() []string {
|
||||
if b.TotalSkippedItems > 0 {
|
||||
status += fmt.Sprintf("%d skipped", b.TotalSkippedItems)
|
||||
|
||||
if b.SkippedMalware+b.SkippedNotFound > 0 {
|
||||
if b.SkippedMalware+b.SkippedNotFound+b.SkippedInvalidOneNoteFile > 0 {
|
||||
status += ": "
|
||||
}
|
||||
}
|
||||
|
||||
skipped := []string{}
|
||||
|
||||
if b.SkippedMalware > 0 {
|
||||
status += fmt.Sprintf("%d malware", b.SkippedMalware)
|
||||
|
||||
if b.SkippedNotFound > 0 {
|
||||
status += ", "
|
||||
}
|
||||
skipped = append(skipped, fmt.Sprintf("%d malware", b.SkippedMalware))
|
||||
}
|
||||
|
||||
if b.SkippedNotFound > 0 {
|
||||
status += fmt.Sprintf("%d not found", b.SkippedNotFound)
|
||||
skipped = append(skipped, fmt.Sprintf("%d not found", b.SkippedNotFound))
|
||||
}
|
||||
|
||||
if b.SkippedInvalidOneNoteFile > 0 {
|
||||
skipped = append(skipped, fmt.Sprintf("%d invalid OneNote file", b.SkippedInvalidOneNoteFile))
|
||||
}
|
||||
|
||||
status += strings.Join(skipped, ", ")
|
||||
|
||||
if errCount+b.TotalSkippedItems > 0 {
|
||||
status += (")")
|
||||
}
|
||||
|
||||
@ -153,7 +153,19 @@ func (suite *BackupUnitSuite) TestBackup_Values_statusVariations() {
|
||||
expect: "test (42 errors, 1 skipped: 1 not found)",
|
||||
},
|
||||
{
|
||||
name: "errors, malware, notFound",
|
||||
name: "errors and invalid OneNote",
|
||||
bup: backup.Backup{
|
||||
Status: "test",
|
||||
ErrorCount: 42,
|
||||
SkippedCounts: stats.SkippedCounts{
|
||||
TotalSkippedItems: 1,
|
||||
SkippedInvalidOneNoteFile: 1,
|
||||
},
|
||||
},
|
||||
expect: "test (42 errors, 1 skipped: 1 invalid OneNote file)",
|
||||
},
|
||||
{
|
||||
name: "errors, malware, notFound, invalid OneNote",
|
||||
bup: backup.Backup{
|
||||
Status: "test",
|
||||
ErrorCount: 42,
|
||||
@ -161,9 +173,10 @@ func (suite *BackupUnitSuite) TestBackup_Values_statusVariations() {
|
||||
TotalSkippedItems: 1,
|
||||
SkippedMalware: 1,
|
||||
SkippedNotFound: 1,
|
||||
SkippedInvalidOneNoteFile: 1,
|
||||
},
|
||||
},
|
||||
expect: "test (42 errors, 1 skipped: 1 malware, 1 not found)",
|
||||
expect: "test (42 errors, 1 skipped: 1 malware, 1 not found, 1 invalid OneNote file)",
|
||||
},
|
||||
}
|
||||
for _, test := range table {
|
||||
|
||||
@ -156,6 +156,13 @@ const (
|
||||
// SkipNotFound identifies that a file was skipped because we could
|
||||
// not find it when trying to download contents
|
||||
SkipNotFound skipCause = "file_not_found"
|
||||
|
||||
// SkipBigOneNote identifies that a file was skipped because it
|
||||
// was big OneNote file and we can only download OneNote files which
|
||||
// are less that 2GB in size.
|
||||
//nolint:lll
|
||||
// https://support.microsoft.com/en-us/office/restrictions-and-limitations-in-onedrive-and-sharepoint-64883a5d-228e-48f5-b3d2-eb39e07630fa#onenotenotebooks
|
||||
SkipBigOneNote skipCause = "big_one_note_file"
|
||||
)
|
||||
|
||||
var _ print.Printable = &Skipped{}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user