diff --git a/src/internal/m365/collection/drive/collection.go b/src/internal/m365/collection/drive/collection.go index a0895e34f..3f378088d 100644 --- a/src/internal/m365/collection/drive/collection.go +++ b/src/internal/m365/collection/drive/collection.go @@ -4,16 +4,14 @@ import ( "context" "io" "net/http" + "strings" "sync" "sync/atomic" "time" "github.com/alcionai/clues" - "github.com/microsoftgraph/msgraph-sdk-go/models" "github.com/spatialcurrent/go-lazy/pkg/lazy" - i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e "time" - "github.com/alcionai/corso/src/internal/common/idname" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/data" @@ -93,137 +91,6 @@ type Collection struct { counter *count.Bus } -// Replica of models.DriveItemable -type CorsoDriveItemable interface { - GetId() *string - GetName() *string - GetSize() *int64 - GetFile() interface{} - GetFolder() interface{} - GetAdditionalData() map[string]interface{} - GetParentReference() models.ItemReferenceable - SetParentReference(models.ItemReferenceable) - GetShared() models.Sharedable - GetCreatedBy() models.IdentitySetable - GetCreatedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time - GetLastModifiedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time - GetMalware() models.Malwareable - GetSharepointIds() models.SharepointIdsable - GetDeleted() models.Deletedable - GetRoot() models.Rootable -} - -type CorsoDriveItem struct { - ID *string - Name *string - Size *int64 - File interface{} - Folder interface{} - AdditionalData map[string]interface{} - ParentReference models.ItemReferenceable - Shared models.Sharedable - CreatedBy models.IdentitySetable - CreatedDateTime *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time - LastModifiedDateTime *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time - Malware models.Malwareable - Deleted models.Deletedable - Root models.Rootable -} - -func (c *CorsoDriveItem) GetId() *string { - return c.ID -} - -func (c *CorsoDriveItem) GetName() *string { - return c.Name -} - -func (c *CorsoDriveItem) GetSize() *int64 { - return c.Size -} - -func (c *CorsoDriveItem) GetFile() interface{} { - return c.File -} - -func (c *CorsoDriveItem) GetFolder() interface{} { - return c.Folder -} - -func (c *CorsoDriveItem) GetAdditionalData() map[string]interface{} { - return c.AdditionalData -} - -func (c *CorsoDriveItem) GetParentReference() models.ItemReferenceable { - return c.ParentReference -} - -func (c *CorsoDriveItem) SetParentReference(parent models.ItemReferenceable) { - c.ParentReference = parent -} - -func (c *CorsoDriveItem) GetShared() models.Sharedable { - return c.Shared -} - -func (c *CorsoDriveItem) GetCreatedBy() models.IdentitySetable { - return c.CreatedBy -} - -func (c *CorsoDriveItem) GetCreatedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time { - return c.CreatedDateTime -} - -func (c *CorsoDriveItem) GetLastModifiedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time { - return c.LastModifiedDateTime -} - -func (c *CorsoDriveItem) GetMalware() models.Malwareable { - return c.Malware -} - -func (c *CorsoDriveItem) GetSharepointIds() models.SharepointIdsable { - return nil -} - -func (c *CorsoDriveItem) GetDeleted() models.Deletedable { - return c.Deleted -} - -func (c *CorsoDriveItem) GetRoot() models.Rootable { - return c.Root -} - -// models.DriveItemable to CorsoDriveItemable -func ToCorsoDriveItemable(item models.DriveItemable) CorsoDriveItemable { - cdi := &CorsoDriveItem{ - ID: item.GetId(), - Name: item.GetName(), - Size: item.GetSize(), - File: true, - Folder: true, - ParentReference: item.GetParentReference(), - Shared: item.GetShared(), - CreatedBy: item.GetCreatedBy(), - CreatedDateTime: item.GetCreatedDateTime(), - LastModifiedDateTime: item.GetLastModifiedDateTime(), - Malware: item.GetMalware(), - AdditionalData: item.GetAdditionalData(), - Deleted: item.GetDeleted(), - Root: item.GetRoot(), - } - - if item.GetFolder() == nil { - cdi.Folder = nil - } - - if item.GetFile() == nil { - cdi.File = nil - } - - return cdi -} - func (c *Collection) GetDriveItemsMap() map[string]CorsoDriveItemable { return c.driveItems } @@ -429,35 +296,35 @@ func (oc *Collection) getDriveItemContent( return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable) } - // var itemMimeType string - // if item.GetFile() != nil { - // itemMimeType = ptr.Val(item.GetFile().GetMimeType()) - // } - // // Skip big OneNote files as they can't be downloaded - // if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && - // // oc.isPackageOrChildOfPackage && *item.GetSize() >= MaxOneNoteFileSize { - // // TODO: We've removed the file size check because it looks like we've seen persistent - // // 503's with smaller OneNote files also. - // oc.isPackageOrChildOfPackage || strings.EqualFold(itemMimeType, oneNoteMimeType) { - // // FIXME: It is possible that in case of a OneNote file we - // // will end up just backing up the `onetoc2` file without - // // the one file which is the important part of the OneNote - // // "item". This will have to be handled during the - // // restore, or we have to handle it separately by somehow - // // deleting the entire collection. - // logger. - // CtxErr(ctx, err). - // With("skipped_reason", fault.SkipOneNote). - // Info("inaccessible one note file") - // // errs.AddSkip(ctx, fault.FileSkip( - // // fault.SkipOneNote, - // // driveID, - // // itemID, - // // itemName, - // // graph.ItemInfo(item))) + var itemMimeType string + if item.GetFile() != nil { + itemMimeType = ptr.Val(item.GetFile().GetMimeType()) + } + // Skip big OneNote files as they can't be downloaded + if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && + // oc.isPackageOrChildOfPackage && *item.GetSize() >= MaxOneNoteFileSize { + // TODO: We've removed the file size check because it looks like we've seen persistent + // 503's with smaller OneNote files also. + oc.isPackageOrChildOfPackage || strings.EqualFold(itemMimeType, oneNoteMimeType) { + // FIXME: It is possible that in case of a OneNote file we + // will end up just backing up the `onetoc2` file without + // the one file which is the important part of the OneNote + // "item". This will have to be handled during the + // restore, or we have to handle it separately by somehow + // deleting the entire collection. + logger. + CtxErr(ctx, err). + With("skipped_reason", fault.SkipOneNote). + Info("inaccessible one note file") + // errs.AddSkip(ctx, fault.FileSkip( + // fault.SkipOneNote, + // driveID, + // itemID, + // itemName, + // graph.ItemInfo(item))) - // return nil, clues.Wrap(err, "inaccesible oneNote item").Label(graph.LabelsSkippable) - // } + return nil, clues.Wrap(err, "inaccesible oneNote item").Label(graph.LabelsSkippable) + } errs.AddRecoverable( ctx, @@ -710,7 +577,7 @@ func (oc *Collection) streamDriveItem( "item_name", clues.Hide(itemName), "item_size", itemSize) - item.SetParentReference(setName(item.GetParentReference(), oc.driveName)) + // item.SetParentReference(setName(item.GetParentReference(), oc.driveName)) isFile := item.GetFile() != nil diff --git a/src/internal/m365/collection/drive/collections.go b/src/internal/m365/collection/drive/collections.go index d580b98f1..76f24f7a8 100644 --- a/src/internal/m365/collection/drive/collections.go +++ b/src/internal/m365/collection/drive/collections.go @@ -21,6 +21,7 @@ import ( "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/filters" "github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" @@ -859,7 +860,7 @@ func (c *Collections) processItem( var ( itemID = ptr.Val(item.GetId()) itemName = ptr.Val(item.GetName()) - isFolder = item.GetFolder() != nil + isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil ) ctx = clues.Add( @@ -869,16 +870,16 @@ func (c *Collections) processItem( "item_is_folder", isFolder) if item.GetMalware() != nil { - // addtl := graph.ItemInfo(item) - // skip := fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, addtl) + addtl := graph.ItemInfo(di) + skip := fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, addtl) - // if isFolder { - // skip = fault.ContainerSkip(fault.SkipMalware, driveID, itemID, itemName, addtl) - // } + if isFolder { + skip = fault.ContainerSkip(fault.SkipMalware, driveID, itemID, itemName, addtl) + } - // skipper.AddSkip(ctx, skip) - // logger.Ctx(ctx).Infow("malware detected", "item_details", addtl) - // counter.Inc(count.Malware) + skipper.AddSkip(ctx, skip) + logger.Ctx(ctx).Infow("malware detected", "item_details", addtl) + counter.Inc(count.Malware) return nil } @@ -949,9 +950,19 @@ func (c *Collections) processItem( return nil } - // childOfPackage := filters. - // PathPrefix(maps.Keys(topLevelPackages)). - // Compare(collectionPath.String()) + isPackage := item.GetPackageEscaped() != nil + if isPackage { + counter.Inc(count.Packages) + // mark this path as a package type for all other collections. + // any subfolder should get marked as a childOfPackage below. + topLevelPackages[collectionPath.String()] = struct{}{} + } else { + counter.Inc(count.Folders) + } + + childOfPackage := filters. + PathPrefix(maps.Keys(topLevelPackages)). + Compare(collectionPath.String()) // This check is to ensure that if a folder was deleted and // recreated multiple times between a backup, we only use the @@ -987,7 +998,7 @@ func (c *Collections) processItem( driveID, c.statusUpdater, c.ctrl, - false, + isPackage || childOfPackage, invalidPrevDelta || collPathAlreadyExists, nil, counter.Local()) diff --git a/src/internal/m365/collection/drive/custom_drive_item.go b/src/internal/m365/collection/drive/custom_drive_item.go new file mode 100644 index 000000000..9cc5a2783 --- /dev/null +++ b/src/internal/m365/collection/drive/custom_drive_item.go @@ -0,0 +1,285 @@ +package drive + +import ( + "time" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/microsoftgraph/msgraph-sdk-go/models" +) + +// Replica of models.DriveItemable +type CorsoDriveItemable interface { + GetId() *string + GetName() *string + GetSize() *int64 + GetFile() fileDriveItemable + GetFolder() folderDriveItemable + GetPackageEscaped() packageDriveItemable + GetParentReference() parentReferenceable + GetAdditionalData() map[string]interface{} + SetParentReference(parentReferenceable) + GetShared() itemSharedable + GetCreatedBy() itemIdentitySetable + GetCreatedDateTime() *time.Time + GetLastModifiedDateTime() *time.Time + GetMalware() malwareable + GetDeleted() deletedable + GetRoot() itemRootable + // Not used anywhere + //GetSharepointIds() sharepointIdsable +} + +type fileDriveItemable interface { + GetMimeType() *string +} +type folderDriveItemable interface{} +type packageDriveItemable interface{} +type parentReferenceable interface { + GetPath() *string + GetId() *string + GetName() *string + GetDriveId() *string +} +type itemSharedable interface{} +type malwareable interface{} +type deletedable interface{} +type itemRootable interface{} +type itemIdentitySetable interface { + GetUser() itemUserable +} +type itemUserable interface { + GetAdditionalData() map[string]interface{} +} + +// Concrete implementations +type folderDriveItem struct { + isFolder bool +} + +type fileDriveItem struct { + isFile bool + mimeType *string +} + +func (fdi *fileDriveItem) GetMimeType() *string { + return fdi.mimeType +} + +type packageDriveItem struct { + isPackage bool +} + +type parentReference struct { + path *string + id *string + name *string + driveId *string +} + +func (pr *parentReference) GetPath() *string { + return pr.path +} + +func (pr *parentReference) GetId() *string { + return pr.id +} + +func (pr *parentReference) GetName() *string { + return pr.name +} + +func (pr *parentReference) GetDriveId() *string { + return pr.driveId +} + +type itemShared struct { + isShared bool +} + +type itemMalware struct { + isMalware bool +} + +type itemDeleted struct { + isDeleted bool +} + +type itemRoot struct { + isRoot bool +} + +type itemIdentitySet struct { + user itemUserable +} + +func (iis *itemIdentitySet) GetUser() itemUserable { + return iis.user +} + +type itemUser struct { + additionalData map[string]interface{} +} + +func (iu *itemUser) GetAdditionalData() map[string]interface{} { + return iu.additionalData +} + +type CorsoDriveItem struct { + ID string + Name string + Size int64 + File fileDriveItemable + Folder folderDriveItemable + Package packageDriveItemable + AdditionalData map[string]interface{} + ParentReference parentReferenceable + Shared itemSharedable + CreatedBy itemIdentitySetable + CreatedDateTime *time.Time + LastModifiedDateTime *time.Time + Malware malwareable + Deleted deletedable + Root itemRootable +} + +func (c *CorsoDriveItem) GetId() *string { + return &c.ID +} + +func (c *CorsoDriveItem) GetName() *string { + return &c.Name +} + +func (c *CorsoDriveItem) GetSize() *int64 { + return &c.Size +} + +func (c *CorsoDriveItem) GetFile() fileDriveItemable { + return c.File +} + +func (c *CorsoDriveItem) GetFolder() folderDriveItemable { + return c.Folder +} + +func (c *CorsoDriveItem) GetPackageEscaped() packageDriveItemable { + return c.Package +} + +func (c *CorsoDriveItem) GetParentReference() parentReferenceable { + return c.ParentReference +} + +func (c *CorsoDriveItem) SetParentReference(parent parentReferenceable) { + c.ParentReference = parent +} + +func (c *CorsoDriveItem) GetAdditionalData() map[string]interface{} { + return c.AdditionalData +} + +func (c *CorsoDriveItem) GetShared() itemSharedable { + return c.Shared +} + +func (c *CorsoDriveItem) GetCreatedBy() itemIdentitySetable { + return c.CreatedBy +} + +func (c *CorsoDriveItem) GetCreatedDateTime() *time.Time { + return c.CreatedDateTime +} + +func (c *CorsoDriveItem) GetLastModifiedDateTime() *time.Time { + return c.LastModifiedDateTime +} + +func (c *CorsoDriveItem) GetMalware() malwareable { + return c.Malware +} + +func (c *CorsoDriveItem) GetDeleted() deletedable { + return c.Deleted +} + +func (c *CorsoDriveItem) GetRoot() itemRootable { + return c.Root +} + +// func (c *CorsoDriveItem) GetSharepointIds() sharepointIdsable { +// return nil +// } + +// models.DriveItemable to CorsoDriveItemable +func ToCorsoDriveItemable(item models.DriveItemable) CorsoDriveItemable { + cdi := &CorsoDriveItem{ + ID: ptr.Val(item.GetId()), + Name: ptr.Val(item.GetName()), + Size: ptr.Val(item.GetSize()), + CreatedDateTime: item.GetCreatedDateTime(), + LastModifiedDateTime: item.GetLastModifiedDateTime(), + AdditionalData: item.GetAdditionalData(), + } + + if item.GetFolder() != nil { + cdi.Folder = &folderDriveItem{ + isFolder: true, + } + } + + if item.GetFile() != nil { + cdi.File = &fileDriveItem{ + isFile: true, + mimeType: item.GetFile().GetMimeType(), + } + } + + if item.GetPackageEscaped() != nil { + cdi.Package = &packageDriveItem{ + isPackage: true, + } + } + + if item.GetParentReference() != nil { + cdi.ParentReference = &parentReference{ + id: item.GetParentReference().GetId(), + path: item.GetParentReference().GetPath(), + name: item.GetParentReference().GetName(), + driveId: item.GetParentReference().GetDriveId(), + } + } + + if item.GetShared() != nil { + cdi.Shared = &itemShared{ + isShared: true, + } + } + + if item.GetMalware() != nil { + cdi.Malware = &itemMalware{ + isMalware: true, + } + } + + if item.GetDeleted() != nil { + cdi.Deleted = &itemDeleted{ + isDeleted: true, + } + } + + if item.GetRoot() != nil { + cdi.Root = &itemRoot{ + isRoot: true, + } + } + + if item.GetCreatedBy() != nil { + cdi.CreatedBy = &itemIdentitySet{ + user: &itemUser{ + additionalData: item.GetCreatedBy().GetUser().GetAdditionalData(), + }, + } + } + + return cdi +}