Flatten everything

This commit is contained in:
Abhishek Pandey 2023-11-20 20:23:09 -08:00
parent b444ed328e
commit 27383e950e
3 changed files with 339 additions and 176 deletions

View File

@ -4,16 +4,14 @@ import (
"context" "context"
"io" "io"
"net/http" "net/http"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
"github.com/alcionai/clues" "github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/spatialcurrent/go-lazy/pkg/lazy" "github.com/spatialcurrent/go-lazy/pkg/lazy"
i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e "time"
"github.com/alcionai/corso/src/internal/common/idname" "github.com/alcionai/corso/src/internal/common/idname"
"github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/data"
@ -93,137 +91,6 @@ type Collection struct {
counter *count.Bus counter *count.Bus
} }
// Replica of models.DriveItemable
type CorsoDriveItemable interface {
GetId() *string
GetName() *string
GetSize() *int64
GetFile() interface{}
GetFolder() interface{}
GetAdditionalData() map[string]interface{}
GetParentReference() models.ItemReferenceable
SetParentReference(models.ItemReferenceable)
GetShared() models.Sharedable
GetCreatedBy() models.IdentitySetable
GetCreatedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
GetLastModifiedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
GetMalware() models.Malwareable
GetSharepointIds() models.SharepointIdsable
GetDeleted() models.Deletedable
GetRoot() models.Rootable
}
type CorsoDriveItem struct {
ID *string
Name *string
Size *int64
File interface{}
Folder interface{}
AdditionalData map[string]interface{}
ParentReference models.ItemReferenceable
Shared models.Sharedable
CreatedBy models.IdentitySetable
CreatedDateTime *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
LastModifiedDateTime *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
Malware models.Malwareable
Deleted models.Deletedable
Root models.Rootable
}
func (c *CorsoDriveItem) GetId() *string {
return c.ID
}
func (c *CorsoDriveItem) GetName() *string {
return c.Name
}
func (c *CorsoDriveItem) GetSize() *int64 {
return c.Size
}
func (c *CorsoDriveItem) GetFile() interface{} {
return c.File
}
func (c *CorsoDriveItem) GetFolder() interface{} {
return c.Folder
}
func (c *CorsoDriveItem) GetAdditionalData() map[string]interface{} {
return c.AdditionalData
}
func (c *CorsoDriveItem) GetParentReference() models.ItemReferenceable {
return c.ParentReference
}
func (c *CorsoDriveItem) SetParentReference(parent models.ItemReferenceable) {
c.ParentReference = parent
}
func (c *CorsoDriveItem) GetShared() models.Sharedable {
return c.Shared
}
func (c *CorsoDriveItem) GetCreatedBy() models.IdentitySetable {
return c.CreatedBy
}
func (c *CorsoDriveItem) GetCreatedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time {
return c.CreatedDateTime
}
func (c *CorsoDriveItem) GetLastModifiedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time {
return c.LastModifiedDateTime
}
func (c *CorsoDriveItem) GetMalware() models.Malwareable {
return c.Malware
}
func (c *CorsoDriveItem) GetSharepointIds() models.SharepointIdsable {
return nil
}
func (c *CorsoDriveItem) GetDeleted() models.Deletedable {
return c.Deleted
}
func (c *CorsoDriveItem) GetRoot() models.Rootable {
return c.Root
}
// models.DriveItemable to CorsoDriveItemable
func ToCorsoDriveItemable(item models.DriveItemable) CorsoDriveItemable {
cdi := &CorsoDriveItem{
ID: item.GetId(),
Name: item.GetName(),
Size: item.GetSize(),
File: true,
Folder: true,
ParentReference: item.GetParentReference(),
Shared: item.GetShared(),
CreatedBy: item.GetCreatedBy(),
CreatedDateTime: item.GetCreatedDateTime(),
LastModifiedDateTime: item.GetLastModifiedDateTime(),
Malware: item.GetMalware(),
AdditionalData: item.GetAdditionalData(),
Deleted: item.GetDeleted(),
Root: item.GetRoot(),
}
if item.GetFolder() == nil {
cdi.Folder = nil
}
if item.GetFile() == nil {
cdi.File = nil
}
return cdi
}
func (c *Collection) GetDriveItemsMap() map[string]CorsoDriveItemable { func (c *Collection) GetDriveItemsMap() map[string]CorsoDriveItemable {
return c.driveItems return c.driveItems
} }
@ -429,35 +296,35 @@ func (oc *Collection) getDriveItemContent(
return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable) return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable)
} }
// var itemMimeType string var itemMimeType string
// if item.GetFile() != nil { if item.GetFile() != nil {
// itemMimeType = ptr.Val(item.GetFile().GetMimeType()) itemMimeType = ptr.Val(item.GetFile().GetMimeType())
// } }
// // Skip big OneNote files as they can't be downloaded // Skip big OneNote files as they can't be downloaded
// if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) && if clues.HasLabel(err, graph.LabelStatus(http.StatusServiceUnavailable)) &&
// // oc.isPackageOrChildOfPackage && *item.GetSize() >= MaxOneNoteFileSize { // oc.isPackageOrChildOfPackage && *item.GetSize() >= MaxOneNoteFileSize {
// // TODO: We've removed the file size check because it looks like we've seen persistent // TODO: We've removed the file size check because it looks like we've seen persistent
// // 503's with smaller OneNote files also. // 503's with smaller OneNote files also.
// oc.isPackageOrChildOfPackage || strings.EqualFold(itemMimeType, oneNoteMimeType) { oc.isPackageOrChildOfPackage || strings.EqualFold(itemMimeType, oneNoteMimeType) {
// // FIXME: It is possible that in case of a OneNote file we // FIXME: It is possible that in case of a OneNote file we
// // will end up just backing up the `onetoc2` file without // will end up just backing up the `onetoc2` file without
// // the one file which is the important part of the OneNote // the one file which is the important part of the OneNote
// // "item". This will have to be handled during the // "item". This will have to be handled during the
// // restore, or we have to handle it separately by somehow // restore, or we have to handle it separately by somehow
// // deleting the entire collection. // deleting the entire collection.
// logger. logger.
// CtxErr(ctx, err). CtxErr(ctx, err).
// With("skipped_reason", fault.SkipOneNote). With("skipped_reason", fault.SkipOneNote).
// Info("inaccessible one note file") Info("inaccessible one note file")
// // errs.AddSkip(ctx, fault.FileSkip( // errs.AddSkip(ctx, fault.FileSkip(
// // fault.SkipOneNote, // fault.SkipOneNote,
// // driveID, // driveID,
// // itemID, // itemID,
// // itemName, // itemName,
// // graph.ItemInfo(item))) // graph.ItemInfo(item)))
// return nil, clues.Wrap(err, "inaccesible oneNote item").Label(graph.LabelsSkippable) return nil, clues.Wrap(err, "inaccesible oneNote item").Label(graph.LabelsSkippable)
// } }
errs.AddRecoverable( errs.AddRecoverable(
ctx, ctx,
@ -710,7 +577,7 @@ func (oc *Collection) streamDriveItem(
"item_name", clues.Hide(itemName), "item_name", clues.Hide(itemName),
"item_size", itemSize) "item_size", itemSize)
item.SetParentReference(setName(item.GetParentReference(), oc.driveName)) // item.SetParentReference(setName(item.GetParentReference(), oc.driveName))
isFile := item.GetFile() != nil isFile := item.GetFile() != nil

View File

@ -21,6 +21,7 @@ import (
"github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/count" "github.com/alcionai/corso/src/pkg/count"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
"github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api" "github.com/alcionai/corso/src/pkg/services/m365/api"
@ -859,7 +860,7 @@ func (c *Collections) processItem(
var ( var (
itemID = ptr.Val(item.GetId()) itemID = ptr.Val(item.GetId())
itemName = ptr.Val(item.GetName()) itemName = ptr.Val(item.GetName())
isFolder = item.GetFolder() != nil isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil
) )
ctx = clues.Add( ctx = clues.Add(
@ -869,16 +870,16 @@ func (c *Collections) processItem(
"item_is_folder", isFolder) "item_is_folder", isFolder)
if item.GetMalware() != nil { if item.GetMalware() != nil {
// addtl := graph.ItemInfo(item) addtl := graph.ItemInfo(di)
// skip := fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, addtl) skip := fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, addtl)
// if isFolder { if isFolder {
// skip = fault.ContainerSkip(fault.SkipMalware, driveID, itemID, itemName, addtl) skip = fault.ContainerSkip(fault.SkipMalware, driveID, itemID, itemName, addtl)
// } }
// skipper.AddSkip(ctx, skip) skipper.AddSkip(ctx, skip)
// logger.Ctx(ctx).Infow("malware detected", "item_details", addtl) logger.Ctx(ctx).Infow("malware detected", "item_details", addtl)
// counter.Inc(count.Malware) counter.Inc(count.Malware)
return nil return nil
} }
@ -949,9 +950,19 @@ func (c *Collections) processItem(
return nil return nil
} }
// childOfPackage := filters. isPackage := item.GetPackageEscaped() != nil
// PathPrefix(maps.Keys(topLevelPackages)). if isPackage {
// Compare(collectionPath.String()) counter.Inc(count.Packages)
// mark this path as a package type for all other collections.
// any subfolder should get marked as a childOfPackage below.
topLevelPackages[collectionPath.String()] = struct{}{}
} else {
counter.Inc(count.Folders)
}
childOfPackage := filters.
PathPrefix(maps.Keys(topLevelPackages)).
Compare(collectionPath.String())
// This check is to ensure that if a folder was deleted and // This check is to ensure that if a folder was deleted and
// recreated multiple times between a backup, we only use the // recreated multiple times between a backup, we only use the
@ -987,7 +998,7 @@ func (c *Collections) processItem(
driveID, driveID,
c.statusUpdater, c.statusUpdater,
c.ctrl, c.ctrl,
false, isPackage || childOfPackage,
invalidPrevDelta || collPathAlreadyExists, invalidPrevDelta || collPathAlreadyExists,
nil, nil,
counter.Local()) counter.Local())

View File

@ -0,0 +1,285 @@
package drive
import (
"time"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/microsoftgraph/msgraph-sdk-go/models"
)
// Replica of models.DriveItemable
type CorsoDriveItemable interface {
GetId() *string
GetName() *string
GetSize() *int64
GetFile() fileDriveItemable
GetFolder() folderDriveItemable
GetPackageEscaped() packageDriveItemable
GetParentReference() parentReferenceable
GetAdditionalData() map[string]interface{}
SetParentReference(parentReferenceable)
GetShared() itemSharedable
GetCreatedBy() itemIdentitySetable
GetCreatedDateTime() *time.Time
GetLastModifiedDateTime() *time.Time
GetMalware() malwareable
GetDeleted() deletedable
GetRoot() itemRootable
// Not used anywhere
//GetSharepointIds() sharepointIdsable
}
type fileDriveItemable interface {
GetMimeType() *string
}
type folderDriveItemable interface{}
type packageDriveItemable interface{}
type parentReferenceable interface {
GetPath() *string
GetId() *string
GetName() *string
GetDriveId() *string
}
type itemSharedable interface{}
type malwareable interface{}
type deletedable interface{}
type itemRootable interface{}
type itemIdentitySetable interface {
GetUser() itemUserable
}
type itemUserable interface {
GetAdditionalData() map[string]interface{}
}
// Concrete implementations
type folderDriveItem struct {
isFolder bool
}
type fileDriveItem struct {
isFile bool
mimeType *string
}
func (fdi *fileDriveItem) GetMimeType() *string {
return fdi.mimeType
}
type packageDriveItem struct {
isPackage bool
}
type parentReference struct {
path *string
id *string
name *string
driveId *string
}
func (pr *parentReference) GetPath() *string {
return pr.path
}
func (pr *parentReference) GetId() *string {
return pr.id
}
func (pr *parentReference) GetName() *string {
return pr.name
}
func (pr *parentReference) GetDriveId() *string {
return pr.driveId
}
type itemShared struct {
isShared bool
}
type itemMalware struct {
isMalware bool
}
type itemDeleted struct {
isDeleted bool
}
type itemRoot struct {
isRoot bool
}
type itemIdentitySet struct {
user itemUserable
}
func (iis *itemIdentitySet) GetUser() itemUserable {
return iis.user
}
type itemUser struct {
additionalData map[string]interface{}
}
func (iu *itemUser) GetAdditionalData() map[string]interface{} {
return iu.additionalData
}
type CorsoDriveItem struct {
ID string
Name string
Size int64
File fileDriveItemable
Folder folderDriveItemable
Package packageDriveItemable
AdditionalData map[string]interface{}
ParentReference parentReferenceable
Shared itemSharedable
CreatedBy itemIdentitySetable
CreatedDateTime *time.Time
LastModifiedDateTime *time.Time
Malware malwareable
Deleted deletedable
Root itemRootable
}
func (c *CorsoDriveItem) GetId() *string {
return &c.ID
}
func (c *CorsoDriveItem) GetName() *string {
return &c.Name
}
func (c *CorsoDriveItem) GetSize() *int64 {
return &c.Size
}
func (c *CorsoDriveItem) GetFile() fileDriveItemable {
return c.File
}
func (c *CorsoDriveItem) GetFolder() folderDriveItemable {
return c.Folder
}
func (c *CorsoDriveItem) GetPackageEscaped() packageDriveItemable {
return c.Package
}
func (c *CorsoDriveItem) GetParentReference() parentReferenceable {
return c.ParentReference
}
func (c *CorsoDriveItem) SetParentReference(parent parentReferenceable) {
c.ParentReference = parent
}
func (c *CorsoDriveItem) GetAdditionalData() map[string]interface{} {
return c.AdditionalData
}
func (c *CorsoDriveItem) GetShared() itemSharedable {
return c.Shared
}
func (c *CorsoDriveItem) GetCreatedBy() itemIdentitySetable {
return c.CreatedBy
}
func (c *CorsoDriveItem) GetCreatedDateTime() *time.Time {
return c.CreatedDateTime
}
func (c *CorsoDriveItem) GetLastModifiedDateTime() *time.Time {
return c.LastModifiedDateTime
}
func (c *CorsoDriveItem) GetMalware() malwareable {
return c.Malware
}
func (c *CorsoDriveItem) GetDeleted() deletedable {
return c.Deleted
}
func (c *CorsoDriveItem) GetRoot() itemRootable {
return c.Root
}
// func (c *CorsoDriveItem) GetSharepointIds() sharepointIdsable {
// return nil
// }
// models.DriveItemable to CorsoDriveItemable
func ToCorsoDriveItemable(item models.DriveItemable) CorsoDriveItemable {
cdi := &CorsoDriveItem{
ID: ptr.Val(item.GetId()),
Name: ptr.Val(item.GetName()),
Size: ptr.Val(item.GetSize()),
CreatedDateTime: item.GetCreatedDateTime(),
LastModifiedDateTime: item.GetLastModifiedDateTime(),
AdditionalData: item.GetAdditionalData(),
}
if item.GetFolder() != nil {
cdi.Folder = &folderDriveItem{
isFolder: true,
}
}
if item.GetFile() != nil {
cdi.File = &fileDriveItem{
isFile: true,
mimeType: item.GetFile().GetMimeType(),
}
}
if item.GetPackageEscaped() != nil {
cdi.Package = &packageDriveItem{
isPackage: true,
}
}
if item.GetParentReference() != nil {
cdi.ParentReference = &parentReference{
id: item.GetParentReference().GetId(),
path: item.GetParentReference().GetPath(),
name: item.GetParentReference().GetName(),
driveId: item.GetParentReference().GetDriveId(),
}
}
if item.GetShared() != nil {
cdi.Shared = &itemShared{
isShared: true,
}
}
if item.GetMalware() != nil {
cdi.Malware = &itemMalware{
isMalware: true,
}
}
if item.GetDeleted() != nil {
cdi.Deleted = &itemDeleted{
isDeleted: true,
}
}
if item.GetRoot() != nil {
cdi.Root = &itemRoot{
isRoot: true,
}
}
if item.GetCreatedBy() != nil {
cdi.CreatedBy = &itemIdentitySet{
user: &itemUser{
additionalData: item.GetCreatedBy().GetUser().GetAdditionalData(),
},
}
}
return cdi
}