Refactor drive item reader

This commit is contained in:
Abhishek Pandey 2023-06-30 04:28:09 -07:00
parent a7ae09072c
commit e8fb164f18
3 changed files with 184 additions and 129 deletions

View File

@ -21,6 +21,7 @@ import (
"github.com/alcionai/corso/src/internal/observe" "github.com/alcionai/corso/src/internal/observe"
"github.com/alcionai/corso/src/pkg/backup/details" "github.com/alcionai/corso/src/pkg/backup/details"
"github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/extensions"
"github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/path"
@ -403,17 +404,21 @@ func readItemContents(
return rc, nil return rc, nil
} }
type driveStats struct {
dirsRead int64
itemsRead int64
byteCount int64
itemsFound int64
dirsFound int64
}
// populateItems iterates through items added to the collection // populateItems iterates through items added to the collection
// and uses the collection `itemReader` to read the item // and uses the collection `itemReader` to read the item
func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
var ( var (
byteCount int64
itemsRead int64
dirsRead int64
itemsFound int64
dirsFound int64
wg sync.WaitGroup
el = errs.Local() el = errs.Local()
stats driveStats
wg sync.WaitGroup
) )
// Retrieve the OneDrive folder path to set later in // Retrieve the OneDrive folder path to set later in
@ -445,12 +450,41 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
wg.Add(1) wg.Add(1)
go func(ctx context.Context, item models.DriveItemable) { go func(item models.DriveItemable) {
defer wg.Done() defer wg.Done()
defer func() { <-semaphoreCh }() defer func() { <-semaphoreCh }()
// Read the item // Read the item
oc.populateDriveItem(
ctx,
parentPath,
item,
&stats,
&extensions.ItemExtensionHandler{},
oc.ctrl.BackupItemExtensions,
errs,
)
folderProgress <- struct{}{}
}(item) // TODO: is copy okay here?
}
wg.Wait()
oc.reportAsCompleted(ctx, int(stats.itemsFound), int(stats.itemsRead), stats.byteCount)
}
func (oc *Collection) populateDriveItem(
ctx context.Context,
parentPath *path.Builder,
item models.DriveItemable,
stats *driveStats,
aie extensions.AddItemExtensioner,
factories []extensions.CorsoItemExtensionFactory,
errs *fault.Bus,
) {
var ( var (
el = errs.Local()
itemID = ptr.Val(item.GetId()) itemID = ptr.Val(item.GetId())
itemName = ptr.Val(item.GetName()) itemName = ptr.Val(item.GetName())
itemSize = ptr.Val(item.GetSize()) itemSize = ptr.Val(item.GetSize())
@ -473,12 +507,12 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
isFile := item.GetFile() != nil isFile := item.GetFile() != nil
if isFile { if isFile {
atomic.AddInt64(&itemsFound, 1) atomic.AddInt64(&stats.itemsFound, 1)
metaFileName = itemID metaFileName = itemID
metaSuffix = metadata.MetaFileSuffix metaSuffix = metadata.MetaFileSuffix
} else { } else {
atomic.AddInt64(&dirsFound, 1) atomic.AddInt64(&stats.dirsFound, 1)
// metaFileName not set for directories so we get just ".dirmeta" // metaFileName not set for directories so we get just ".dirmeta"
metaSuffix = metadata.DirMetaFileSuffix metaSuffix = metadata.DirMetaFileSuffix
@ -508,6 +542,32 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
return nil, err return nil, err
} }
if aie != nil && len(factories) != 0 {
logger.Ctx(ctx).Info("enabling drive item extensions")
extRc, extInfo, err := aie.AddItemExtensions(
ctx,
itemData,
itemInfo,
oc.ctrl.BackupItemExtensions)
if err != nil {
return nil, clues.Wrap(err, "adding item extensions")
}
if extInfo == nil {
return nil, clues.New("nil extension info")
}
if extRc == nil {
return nil, clues.New("nil extension reader")
}
itemInfo.OneDrive.Extension = extInfo
itemData = extRc
} else {
logger.Ctx(ctx).Info("drive item extensions disabled")
}
// display/log the item download // display/log the item download
progReader, _ := observe.ItemProgress( progReader, _ := observe.ItemProgress(
ctx, ctx,
@ -546,21 +606,12 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
// Item read successfully, add to collection // Item read successfully, add to collection
if isFile { if isFile {
atomic.AddInt64(&itemsRead, 1) atomic.AddInt64(&stats.itemsRead, 1)
} else { } else {
atomic.AddInt64(&dirsRead, 1) atomic.AddInt64(&stats.dirsRead, 1)
} }
// byteCount iteration atomic.AddInt64(&stats.byteCount, itemSize)
atomic.AddInt64(&byteCount, itemSize)
folderProgress <- struct{}{}
}(ctx, item)
}
wg.Wait()
oc.reportAsCompleted(ctx, int(itemsFound), int(itemsRead), byteCount)
} }
func (oc *Collection) reportAsCompleted(ctx context.Context, itemsFound, itemsRead int, byteCount int64) { func (oc *Collection) reportAsCompleted(ctx context.Context, itemsFound, itemsRead int, byteCount int64) {

View File

@ -988,6 +988,7 @@ type OneDriveInfo struct {
Owner string `json:"owner,omitempty"` Owner string `json:"owner,omitempty"`
ParentPath string `json:"parentPath"` ParentPath string `json:"parentPath"`
Size int64 `json:"size,omitempty"` Size int64 `json:"size,omitempty"`
Extension *ExtensionInfo `json:"extensionData,omitempty"`
} }
// Headers returns the human-readable names of properties in a OneDriveInfo // Headers returns the human-readable names of properties in a OneDriveInfo

View File

@ -8,6 +8,7 @@ import (
"github.com/alcionai/corso/src/internal/common/dttm" "github.com/alcionai/corso/src/internal/common/dttm"
"github.com/alcionai/corso/src/pkg/control/repository" "github.com/alcionai/corso/src/pkg/control/repository"
"github.com/alcionai/corso/src/pkg/extensions"
"github.com/alcionai/corso/src/pkg/logger" "github.com/alcionai/corso/src/pkg/logger"
) )
@ -20,6 +21,7 @@ type Options struct {
ToggleFeatures Toggles `json:"toggleFeatures"` ToggleFeatures Toggles `json:"toggleFeatures"`
Parallelism Parallelism `json:"parallelism"` Parallelism Parallelism `json:"parallelism"`
Repo repository.Options `json:"repo"` Repo repository.Options `json:"repo"`
BackupItemExtensions []extensions.CorsoItemExtensionFactory `json:"-"`
} }
type Parallelism struct { type Parallelism struct {
@ -49,6 +51,7 @@ func Defaults() Options {
CollectionBuffer: 4, CollectionBuffer: 4,
ItemFetch: 4, ItemFetch: 4,
}, },
BackupItemExtensions: []extensions.CorsoItemExtensionFactory{},
} }
} }