diff --git a/CHANGELOG.md b/CHANGELOG.md index 5b0b9c711..a877f4d3a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,10 +10,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added ProtectedResourceName to the backup list json output. ProtectedResourceName holds either a UPN or a WebURL, depending on the resource type. - Rework base selection logic for incremental backups so it's more likely to find a valid base. +- Improve OneDrive restore performance by paralleling item restores ### Fixed - Fix Exchange folder cache population error when parent folder isn't found. - Fix Exchange backup issue caused by incorrect json serialization +- Fix issues with details model containing duplicate entry for api consumers ### Changed - Do not display all the items that we restored at the end if there are more than 15. You can override this with `--verbose`. diff --git a/src/cmd/purge/scripts/exchangePurge.ps1 b/src/cmd/purge/scripts/exchangePurge.ps1 index 240364b8d..c6ca7228a 100644 --- a/src/cmd/purge/scripts/exchangePurge.ps1 +++ b/src/cmd/purge/scripts/exchangePurge.ps1 @@ -185,77 +185,109 @@ function Get-FoldersToPurge { [string[]]$FolderNamePurgeList = @(), [Parameter(Mandatory = $False, HelpMessage = "Purge folders with these prefixes")] - [string[]]$FolderPrefixPurgeList = @() + [string[]]$FolderPrefixPurgeList = @(), + + [Parameter(Mandatory = $False, HelpMessage = "Perform shallow traversal only")] + [bool]$PurgeTraversalShallow = $false ) - $foldersToDelete = @() + Write-Host "`nLooking for folders under well-known folder: $WellKnownRoot matching folders: $FolderNamePurgeList or prefixes: $FolderPrefixPurgeList for user: $User" - # SOAP message for getting the folders - $body = @" - + $foldersToDelete = @() + $traversal = "Deep" + if ($PurgeTraversalShallow) { + $traversal = "Shallow" + } + + $offset = 0 + $moreToList = $true + + # get all folder pages + while ($moreToList) { + # SOAP message for getting the folders + $body = @" + Default + "@ - Write-Host "`nLooking for folders under well-known folder: $WellKnownRoot matching folders: $FolderNamePurgeList or prefixes: $FolderPrefixPurgeList for user: $User" - $getFolderIdMsg = Initialize-SOAPMessage -User $User -Body $body - $response = Invoke-SOAPRequest -Token $Token -Message $getFolderIdMsg + try { + Write-Host "`nRetrieving folders starting from offset: $offset" - # Get the folders from the response - $folders = $response | Select-Xml -XPath "//t:Folders/*" -Namespace @{t = "http://schemas.microsoft.com/exchange/services/2006/types" } | - Select-Object -ExpandProperty Node + $getFolderIdMsg = Initialize-SOAPMessage -User $User -Body $body + $response = Invoke-SOAPRequest -Token $Token -Message $getFolderIdMsg - # Are there more folders to list - $rootFolder = $response | Select-Xml -XPath "//m:RootFolder" -Namespace @{m = "http://schemas.microsoft.com/exchange/services/2006/messages" } | - Select-Object -ExpandProperty Node - $moreToList = ![System.Convert]::ToBoolean($rootFolder.IncludesLastItemInRange) + # Are there more folders to list + $rootFolder = $response | Select-Xml -XPath "//m:RootFolder" -Namespace @{m = "http://schemas.microsoft.com/exchange/services/2006/messages" } | + Select-Object -ExpandProperty Node + $moreToList = ![System.Convert]::ToBoolean($rootFolder.IncludesLastItemInRange) + } + catch { + Write-Host "Error retrieving folders" - # Loop through folders - foreach ($folder in $folders) { - $folderName = $folder.DisplayName - $folderCreateTime = $folder.ExtendedProperty - | Where-Object { $_.ExtendedFieldURI.PropertyTag -eq "0x3007" } - | Select-Object -ExpandProperty Value - | Get-Date + Write-Host $response.OuterXml + Exit + } + + # Get the folders from the response + $folders = $response | Select-Xml -XPath "//t:Folders/*" -Namespace @{t = "http://schemas.microsoft.com/exchange/services/2006/types" } | + Select-Object -ExpandProperty Node - if ($FolderNamePurgeList.count -gt 0) { - $IsNameMatchParams = @{ - 'FolderName' = $folderName; - 'FolderNamePurgeList' = $FolderNamePurgeList - } + # Loop through folders + foreach ($folder in $folders) { + $folderName = $folder.DisplayName + $folderCreateTime = $folder.ExtendedProperty + | Where-Object { $_.ExtendedFieldURI.PropertyTag -eq "0x3007" } + | Select-Object -ExpandProperty Value + | Get-Date - if ((IsNameMatch @IsNameMatchParams)) { - Write-Host "• Found name match: $folderName ($folderCreateTime)" - $foldersToDelete += $folder - continue + if ($FolderNamePurgeList.count -gt 0) { + $IsNameMatchParams = @{ + 'FolderName' = $folderName; + 'FolderNamePurgeList' = $FolderNamePurgeList + } + + if ((IsNameMatch @IsNameMatchParams)) { + Write-Host "• Found name match: $folderName ($folderCreateTime)" + $foldersToDelete += $folder + continue + } + } + + if ($FolderPrefixPurgeList.count -gt 0) { + $IsPrefixAndAgeMatchParams = @{ + 'FolderName' = $folderName; + 'FolderCreateTime' = $folderCreateTime; + 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; + 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; + } + + if ((IsPrefixAndAgeMatch @IsPrefixAndAgeMatchParams)) { + Write-Host "• Found prefix match: $folderName ($folderCreateTime)" + $foldersToDelete += $folder + } } } - if ($FolderPrefixPurgeList.count -gt 0) { - $IsPrefixAndAgeMatchParams = @{ - 'FolderName' = $folderName; - 'FolderCreateTime' = $folderCreateTime; - 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; - 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; - } - - if ((IsPrefixAndAgeMatch @IsPrefixAndAgeMatchParams)) { - Write-Host "• Found prefix match: $folderName ($folderCreateTime)" - $foldersToDelete += $folder - } + if (!$moreToList -or $null -eq $folders) { + Write-Host "Retrieved all folders." + } + else { + $offset += $folders.count } } # powershel does not do well when returning empty arrays - return $foldersToDelete, $moreToList + return , $foldersToDelete } function Empty-Folder { @@ -355,7 +387,10 @@ function Purge-Folders { [string[]]$FolderPrefixPurgeList = @(), [Parameter(Mandatory = $False, HelpMessage = "Purge folders before this date time (UTC)")] - [datetime]$PurgeBeforeTimestamp + [datetime]$PurgeBeforeTimestamp, + + [Parameter(Mandatory = $False, HelpMessage = "Perform shallow traversal only")] + [bool]$PurgeTraversalShallow = $false ) if (($FolderNamePurgeList.count -eq 0) -and @@ -364,9 +399,6 @@ function Purge-Folders { Exit } - Write-Host "`nPurging CI-produced folders..." - Write-Host "--------------------------------" - if ($FolderNamePurgeList.count -gt 0) { Write-Host "Folders with names: $FolderNamePurgeList" } @@ -382,30 +414,27 @@ function Purge-Folders { 'WellKnownRoot' = $WellKnownRoot; 'FolderNamePurgeList' = $FolderNamePurgeList; 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; - 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp + 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp; + 'PurgeTraversalShallow' = $PurgeTraversalShallow } - $moreToList = $True - # only get max of 1000 results so we may need to iterate over eligible folders - while ($moreToList) { - $foldersToDelete, $moreToList = Get-FoldersToPurge @foldersToDeleteParams - $foldersToDeleteCount = $foldersToDelete.count - $foldersToDeleteIds = @() - $folderNames = @() + $foldersToDelete = Get-FoldersToPurge @foldersToDeleteParams + $foldersToDeleteCount = $foldersToDelete.count + $foldersToDeleteIds = @() + $folderNames = @() - if ($foldersToDeleteCount -eq 0) { - Write-Host "`nNo folders to purge matching the criteria" - break - } - - foreach ($folder in $foldersToDelete) { - $foldersToDeleteIds += $folder.FolderId.Id - $folderNames += $folder.DisplayName - } - - Empty-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames - Delete-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames + if ($foldersToDeleteCount -eq 0) { + Write-Host "`nNo folders to purge matching the criteria" + return } + + foreach ($folder in $foldersToDelete) { + $foldersToDeleteIds += $folder.FolderId.Id + $folderNames += $folder.DisplayName + } + + Empty-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames + Delete-Folder -FolderIdList $foldersToDeleteIds -FolderNameList $folderNames } function Create-Contact { @@ -459,7 +488,7 @@ function Get-ItemsToPurge { $foldersToSearchBody = "" if (![String]::IsNullOrEmpty($SubFolderName)) { - $subFolders, $moreToList = Get-FoldersToPurge -WellKnownRoot $WellKnownRoot -FolderNamePurgeList $SubFolderName -PurgeBeforeTimestamp $PurgeBeforeTimestamp + $subFolders = Get-FoldersToPurge -WellKnownRoot $WellKnownRoot -FolderNamePurgeList $SubFolderName -PurgeBeforeTimestamp $PurgeBeforeTimestamp if ($subFolders.count -gt 0 ) { $foldersToSearchBody = "" @@ -615,6 +644,8 @@ function Purge-Items { } } +### MAIN #### + Write-Host 'Authenticating with Exchange Web Services ...' $global:Token = Get-AccessToken | ConvertTo-SecureString -AsPlainText -Force @@ -622,14 +653,17 @@ $global:Token = Get-AccessToken | ConvertTo-SecureString -AsPlainText -Force $FolderNamePurgeList = $FolderNamePurgeList | ForEach-Object { @($_.Split(',').Trim()) } $FolderPrefixPurgeList = $FolderPrefixPurgeList | ForEach-Object { @($_.Split(',').Trim()) } +Write-Host "`nPurging CI-produced folders under 'msgfolderroot' ..." +Write-Host "--------------------------------------------------------" + $purgeFolderParams = @{ - 'WellKnownRoot' = "root"; + 'WellKnownRoot' = "msgfolderroot"; 'FolderNamePurgeList' = $FolderNamePurgeList; 'FolderPrefixPurgeList' = $FolderPrefixPurgeList; 'PurgeBeforeTimestamp' = $PurgeBeforeTimestamp } -#purge older prefix folders +#purge older prefix folders from msgfolderroot Purge-Folders @purgeFolderParams #purge older contacts @@ -647,4 +681,20 @@ Purge-Items -ItemsFolder "calendar" -ItemsSubFolder "Birthdays" -PurgeBeforeTime # -/Recoverable Items/SubstrateHolds Write-Host "`nProcess well-known folders that are always purged" Write-Host "---------------------------------------------------" -Empty-Folder -WellKnownRoot "deleteditems", "recoverableitemsroot" \ No newline at end of file + +# We explicitly also clean direct folders under Deleted Items since there is some evidence +# that suggests that emptying alone may not be reliable +Write-Host "`nExplicit delete of all folders under 'DeletedItems' ..." +Write-Host "----------------------------------------------------------" + +$purgeFolderParams = @{ + 'WellKnownRoot' = "deleteditems"; + 'FolderNamePurgeList' = $FolderNamePurgeList; + 'FolderPrefixPurgeList' = @('*'); + 'PurgeBeforeTimestamp' = (Get-Date); + 'PurgeTraversalShallow' = $true +} + +Purge-Folders @purgeFolderParams + +Empty-Folder -WellKnownRootList "deleteditems", "recoverableitemsroot" diff --git a/src/go.mod b/src/go.mod index 2dd8fccae..625c2a632 100644 --- a/src/go.mod +++ b/src/go.mod @@ -34,7 +34,7 @@ require ( go.uber.org/zap v1.24.0 golang.org/x/exp v0.0.0-20230213192124-5e25df0256eb golang.org/x/time v0.3.0 - golang.org/x/tools v0.9.1 + golang.org/x/tools v0.9.2 ) require ( diff --git a/src/go.sum b/src/go.sum index 84fa701e3..81b373246 100644 --- a/src/go.sum +++ b/src/go.sum @@ -672,8 +672,8 @@ golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4f golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/tools v0.9.1 h1:8WMNJAz3zrtPmnYC7ISf5dEn3MT0gY7jBJfw27yrrLo= -golang.org/x/tools v0.9.1/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= +golang.org/x/tools v0.9.2 h1:UXbndbirwCAx6TULftIfie/ygDNCwxEie+IiNP1IcNc= +golang.org/x/tools v0.9.2/go.mod h1:owI94Op576fPu3cIGQeHs3joujW/2Oc6MtlxbF5dfNc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/src/internal/connector/data_collections.go b/src/internal/connector/data_collections.go index 8d5abdcda..10f4fab2b 100644 --- a/src/internal/connector/data_collections.go +++ b/src/internal/connector/data_collections.go @@ -3,6 +3,7 @@ package connector import ( "context" "strings" + "sync" "github.com/alcionai/clues" @@ -26,6 +27,13 @@ import ( "github.com/alcionai/corso/src/pkg/selectors" ) +const ( + // copyBufferSize is used for chunked upload + // Microsoft recommends 5-10MB buffers + // https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices + copyBufferSize = 5 * 1024 * 1024 +) + // --------------------------------------------------------------------------- // Data Collections // --------------------------------------------------------------------------- @@ -65,8 +73,7 @@ func (gc *GraphConnector) ProduceBackupCollections( ctx, gc.Discovery.Users(), path.ServiceType(sels.Service), - sels.DiscreteOwner, - ) + sels.DiscreteOwner) if err != nil { return nil, nil, err } @@ -90,10 +97,11 @@ func (gc *GraphConnector) ProduceBackupCollections( case selectors.ServiceExchange: colls, ssmb, err = exchange.DataCollections( ctx, + gc.Discovery, sels, + gc.credentials.AzureTenantID, owner, metadata, - gc.credentials, gc.UpdateStatus, ctrlOpts, errs) @@ -256,13 +264,46 @@ func (gc *GraphConnector) ConsumeRestoreCollections( return nil, clues.Wrap(err, "malformed azure credentials") } + // Buffer pool for uploads + pool := sync.Pool{ + New: func() interface{} { + b := make([]byte, copyBufferSize) + return &b + }, + } + switch sels.Service { case selectors.ServiceExchange: - status, err = exchange.RestoreCollections(ctx, creds, gc.Discovery, gc.Service, dest, dcs, deets, errs) + status, err = exchange.RestoreCollections(ctx, + creds, + gc.Discovery, + gc.Service, + dest, + dcs, + deets, + errs) case selectors.ServiceOneDrive: - status, err = onedrive.RestoreCollections(ctx, creds, backupVersion, gc.Service, dest, opts, dcs, deets, errs) + status, err = onedrive.RestoreCollections(ctx, + creds, + backupVersion, + gc.Service, + dest, + opts, + dcs, + deets, + &pool, + errs) case selectors.ServiceSharePoint: - status, err = sharepoint.RestoreCollections(ctx, backupVersion, creds, gc.Service, dest, opts, dcs, deets, errs) + status, err = sharepoint.RestoreCollections(ctx, + backupVersion, + creds, + gc.Service, + dest, + opts, + dcs, + deets, + &pool, + errs) default: err = clues.Wrap(clues.New(sels.Service.String()), "service not supported") } diff --git a/src/internal/connector/data_collections_test.go b/src/internal/connector/data_collections_test.go index 299842e82..4c55b0d77 100644 --- a/src/internal/connector/data_collections_test.go +++ b/src/internal/connector/data_collections_test.go @@ -21,6 +21,7 @@ import ( "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/selectors" selTD "github.com/alcionai/corso/src/pkg/selectors/testdata" + "github.com/alcionai/corso/src/pkg/services/m365/api" ) // --------------------------------------------------------------------------- @@ -29,8 +30,10 @@ import ( type DataCollectionIntgSuite struct { tester.Suite - user string - site string + user string + site string + tenantID string + ac api.Client } func TestDataCollectionIntgSuite(t *testing.T) { @@ -42,10 +45,19 @@ func TestDataCollectionIntgSuite(t *testing.T) { } func (suite *DataCollectionIntgSuite) SetupSuite() { - suite.user = tester.M365UserID(suite.T()) - suite.site = tester.M365SiteID(suite.T()) + t := suite.T() - tester.LogTimeOfTest(suite.T()) + suite.user = tester.M365UserID(t) + suite.site = tester.M365SiteID(t) + + acct := tester.NewM365Account(t) + creds, err := acct.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.tenantID = creds.AzureTenantID + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) } // TestExchangeDataCollection verifies interface between operation and @@ -111,16 +123,18 @@ func (suite *DataCollectionIntgSuite) TestExchangeDataCollection() { defer flush() sel := test.getSelector(t) + uidn := inMock.NewProvider(sel.ID(), sel.Name()) ctrlOpts := control.Defaults() ctrlOpts.ToggleFeatures.DisableDelta = !canMakeDeltaQueries collections, excludes, err := exchange.DataCollections( ctx, + suite.ac, sel, - sel, + suite.tenantID, + uidn, nil, - connector.credentials, connector.UpdateStatus, ctrlOpts, fault.New(true)) @@ -133,7 +147,7 @@ func (suite *DataCollectionIntgSuite) TestExchangeDataCollection() { // Categories with delta endpoints will produce a collection for metadata // as well as the actual data pulled, and the "temp" root collection. - assert.GreaterOrEqual(t, len(collections), 1, "expected 1 <= num collections <= 2") + assert.LessOrEqual(t, 1, len(collections), "expected 1 <= num collections <= 3") assert.GreaterOrEqual(t, 3, len(collections), "expected 1 <= num collections <= 3") for _, col := range collections { diff --git a/src/internal/connector/exchange/contact_folder_cache.go b/src/internal/connector/exchange/contact_folder_cache.go index 5526bf7b7..75cc2f66d 100644 --- a/src/internal/connector/exchange/contact_folder_cache.go +++ b/src/internal/connector/exchange/contact_folder_cache.go @@ -71,9 +71,9 @@ func (cfc *contactFolderCache) Populate( ctx context.Context, errs *fault.Bus, baseID string, - baseContainerPather ...string, + baseContainerPath ...string, ) error { - if err := cfc.init(ctx, baseID, baseContainerPather); err != nil { + if err := cfc.init(ctx, baseID, baseContainerPath); err != nil { return clues.Wrap(err, "initializing") } @@ -95,7 +95,7 @@ func (cfc *contactFolderCache) init( baseContainerPath []string, ) error { if len(baseNode) == 0 { - return clues.New("m365 folderID required for base folder").WithClues(ctx) + return clues.New("m365 folderID required for base contact folder").WithClues(ctx) } if cfc.containerResolver == nil { diff --git a/src/internal/connector/exchange/contacts_backup.go b/src/internal/connector/exchange/contacts_backup.go new file mode 100644 index 000000000..4054a17a8 --- /dev/null +++ b/src/internal/connector/exchange/contacts_backup.go @@ -0,0 +1,40 @@ +package exchange + +import ( + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ backupHandler = &contactBackupHandler{} + +type contactBackupHandler struct { + ac api.Contacts +} + +func newContactBackupHandler( + ac api.Client, +) contactBackupHandler { + acc := ac.Contacts() + + return contactBackupHandler{ + ac: acc, + } +} + +func (h contactBackupHandler) itemEnumerator() addedAndRemovedItemGetter { + return h.ac +} + +func (h contactBackupHandler) itemHandler() itemGetterSerializer { + return h.ac +} + +func (h contactBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return DefaultContactFolder, &contactFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} diff --git a/src/internal/connector/exchange/contacts_restore.go b/src/internal/connector/exchange/contacts_restore.go new file mode 100644 index 000000000..63d0e87c8 --- /dev/null +++ b/src/internal/connector/exchange/contacts_restore.go @@ -0,0 +1,86 @@ +package exchange + +import ( + "context" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ itemRestorer = &contactRestoreHandler{} + +type contactRestoreHandler struct { + ac api.Contacts + ip itemPoster[models.Contactable] +} + +func newContactRestoreHandler( + ac api.Client, +) contactRestoreHandler { + return contactRestoreHandler{ + ac: ac.Contacts(), + ip: ac.Contacts(), + } +} + +func (h contactRestoreHandler) newContainerCache(userID string) graph.ContainerResolver { + return &contactFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} + +func (h contactRestoreHandler) formatRestoreDestination( + destinationContainerName string, + _ path.Path, // contact folders cannot be nested +) *path.Builder { + return path.Builder{}.Append(destinationContainerName) +} + +func (h contactRestoreHandler) CreateContainer( + ctx context.Context, + userID, containerName, _ string, // parent container not used +) (graph.Container, error) { + return h.ac.CreateContainer(ctx, userID, containerName, "") +} + +func (h contactRestoreHandler) containerSearcher() containerByNamer { + return nil +} + +// always returns the provided value +func (h contactRestoreHandler) orRootContainer(c string) string { + return c +} + +func (h contactRestoreHandler) restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, +) (*details.ExchangeInfo, error) { + contact, err := api.BytesToContactable(body) + if err != nil { + return nil, graph.Wrap(ctx, err, "creating contact from bytes") + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(contact.GetId())) + + item, err := h.ip.PostItem(ctx, userID, destinationID, contact) + if err != nil { + return nil, graph.Wrap(ctx, err, "restoring mail message") + } + + info := api.ContactInfo(item) + info.Size = int64(len(body)) + + return info, nil +} diff --git a/src/internal/connector/exchange/contacts_restore_test.go b/src/internal/connector/exchange/contacts_restore_test.go new file mode 100644 index 000000000..d33e9fb61 --- /dev/null +++ b/src/internal/connector/exchange/contacts_restore_test.go @@ -0,0 +1,57 @@ +package exchange + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type ContactsRestoreIntgSuite struct { + tester.Suite + creds account.M365Config + ac api.Client + userID string +} + +func TestContactsRestoreIntgSuite(t *testing.T) { + suite.Run(t, &ContactsRestoreIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs}), + }) +} + +func (suite *ContactsRestoreIntgSuite) SetupSuite() { + t := suite.T() + + a := tester.NewM365Account(t) + creds, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.creds = creds + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) + + suite.userID = tester.M365UserID(t) +} + +// Testing to ensure that cache system works for in multiple different environments +func (suite *ContactsRestoreIntgSuite) TestCreateContainerDestination() { + runCreateDestinationTest( + suite.T(), + newMailRestoreHandler(suite.ac), + path.EmailCategory, + suite.creds.AzureTenantID, + suite.userID, + tester.DefaultTestRestoreDestination("").ContainerName, + []string{"Hufflepuff"}, + []string{"Ravenclaw"}) +} diff --git a/src/internal/connector/exchange/container_resolver_test.go b/src/internal/connector/exchange/container_resolver_test.go index 562749cec..1cfe4690e 100644 --- a/src/internal/connector/exchange/container_resolver_test.go +++ b/src/internal/connector/exchange/container_resolver_test.go @@ -15,7 +15,6 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" - "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" ) @@ -676,181 +675,69 @@ func (suite *ConfiguredFolderCacheUnitSuite) TestAddToCache() { // integration suite // --------------------------------------------------------------------------- -type FolderCacheIntegrationSuite struct { - tester.Suite - credentials account.M365Config - gs graph.Servicer -} - -func TestFolderCacheIntegrationSuite(t *testing.T) { - suite.Run(t, &FolderCacheIntegrationSuite{ - Suite: tester.NewIntegrationSuite( - t, - [][]string{tester.M365AcctCredEnvs}, - ), - }) -} - -func (suite *FolderCacheIntegrationSuite) SetupSuite() { - t := suite.T() - - a := tester.NewM365Account(t) - m365, err := a.M365Config() - require.NoError(t, err, clues.ToCore(err)) - - suite.credentials = m365 - - adpt, err := graph.CreateAdapter( - m365.AzureTenantID, - m365.AzureClientID, - m365.AzureClientSecret) - require.NoError(t, err, clues.ToCore(err)) - - suite.gs = graph.NewService(adpt) -} - -// Testing to ensure that cache system works for in multiple different environments -func (suite *FolderCacheIntegrationSuite) TestCreateContainerDestination() { - a := tester.NewM365Account(suite.T()) - m365, err := a.M365Config() - require.NoError(suite.T(), err, clues.ToCore(err)) +func runCreateDestinationTest( + t *testing.T, + handler restoreHandler, + category path.CategoryType, + tenantID, userID, destinationName string, + containerNames1 []string, + containerNames2 []string, +) { + ctx, flush := tester.NewContext(t) + defer flush() var ( - user = tester.M365UserID(suite.T()) - directoryCaches = make(map[path.CategoryType]graph.ContainerResolver) - folderName = tester.DefaultTestRestoreDestination("").ContainerName - tests = []struct { - name string - pathFunc1 func(t *testing.T) path.Path - pathFunc2 func(t *testing.T) path.Path - category path.CategoryType - folderPrefix string - }{ - { - name: "Mail Cache Test", - category: path.EmailCategory, - pathFunc1: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EmailCategory, - false, - "Griffindor", "Croix") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - pathFunc2: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EmailCategory, - false, - "Griffindor", "Felicius") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - }, - { - name: "Contact Cache Test", - category: path.ContactsCategory, - pathFunc1: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.ContactsCategory, - false, - "HufflePuff") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - pathFunc2: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.ContactsCategory, - false, - "Ravenclaw") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - }, - { - name: "Event Cache Test", - category: path.EventsCategory, - pathFunc1: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EventsCategory, - false, - "Durmstrang") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - pathFunc2: func(t *testing.T) path.Path { - pth, err := path.Build( - suite.credentials.AzureTenantID, - user, - path.ExchangeService, - path.EventsCategory, - false, - "Beauxbatons") - require.NoError(t, err, clues.ToCore(err)) - - return pth - }, - }, - } + svc = path.ExchangeService + gcr = handler.newContainerCache(userID) ) - for _, test := range tests { - suite.Run(test.name, func() { - t := suite.T() + path1, err := path.Build( + tenantID, + userID, + svc, + category, + false, + containerNames1...) + require.NoError(t, err, clues.ToCore(err)) - ctx, flush := tester.NewContext(t) - defer flush() + containerID, gcr, err := createDestination( + ctx, + handler, + handler.formatRestoreDestination(destinationName, path1), + userID, + gcr, + true, + fault.New(true)) + require.NoError(t, err, clues.ToCore(err)) - folderID, err := CreateContainerDestination( - ctx, - m365, - test.pathFunc1(t), - folderName, - directoryCaches, - fault.New(true)) - require.NoError(t, err, clues.ToCore(err)) + _, _, err = gcr.IDToPath(ctx, containerID) + assert.NoError(t, err, clues.ToCore(err)) - resolver := directoryCaches[test.category] + path2, err := path.Build( + tenantID, + userID, + svc, + category, + false, + containerNames2...) + require.NoError(t, err, clues.ToCore(err)) - _, _, err = resolver.IDToPath(ctx, folderID) - assert.NoError(t, err, clues.ToCore(err)) + containerID, gcr, err = createDestination( + ctx, + handler, + handler.formatRestoreDestination(destinationName, path2), + userID, + gcr, + false, + fault.New(true)) + require.NoError(t, err, clues.ToCore(err)) - secondID, err := CreateContainerDestination( - ctx, - m365, - test.pathFunc2(t), - folderName, - directoryCaches, - fault.New(true)) - require.NoError(t, err, clues.ToCore(err)) + p, l, err := gcr.IDToPath(ctx, containerID) + require.NoError(t, err, clues.ToCore(err)) - p, l, err := resolver.IDToPath(ctx, secondID) - require.NoError(t, err, clues.ToCore(err)) + _, ok := gcr.LocationInCache(l.String()) + require.True(t, ok, "looking for location in cache: %s", l) - _, ok := resolver.LocationInCache(l.String()) - require.True(t, ok, "looking for location in cache: %s", l) - - _, ok = resolver.PathInCache(p.String()) - require.True(t, ok, "looking for path in cache: %s", p) - }) - } + _, ok = gcr.PathInCache(p.String()) + require.True(t, ok, "looking for path in cache: %s", p) } diff --git a/src/internal/connector/exchange/data_collections.go b/src/internal/connector/exchange/data_collections.go index d9905f3a3..0ee250b2e 100644 --- a/src/internal/connector/exchange/data_collections.go +++ b/src/internal/connector/exchange/data_collections.go @@ -12,7 +12,6 @@ import ( "github.com/alcionai/corso/src/internal/connector/support" "github.com/alcionai/corso/src/internal/data" "github.com/alcionai/corso/src/internal/observe" - "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" @@ -159,15 +158,13 @@ func parseMetadataCollections( // DataCollections returns a DataCollection which the caller can // use to read mailbox data out for the specified user -// Assumption: User exists -// -// Add iota to this call -> mail, contacts, calendar, etc. func DataCollections( ctx context.Context, + ac api.Client, selector selectors.Selector, + tenantID string, user idname.Provider, metadata []data.RestoreCollection, - acct account.M365Config, su support.StatusUpdater, ctrlOpts control.Options, errs *fault.Bus, @@ -181,6 +178,7 @@ func DataCollections( collections = []data.BackupCollection{} el = errs.Local() categories = map[path.CategoryType]struct{}{} + handlers = BackupHandlers(ac) ) // Turn on concurrency limiter middleware for exchange backups @@ -201,7 +199,8 @@ func DataCollections( dcs, err := createCollections( ctx, - acct, + handlers, + tenantID, user, scope, cdps[scope.Category().PathType()], @@ -222,7 +221,7 @@ func DataCollections( baseCols, err := graph.BaseCollections( ctx, collections, - acct.AzureTenantID, + tenantID, user.ID(), path.ExchangeService, categories, @@ -238,25 +237,13 @@ func DataCollections( return collections, nil, el.Failure() } -func getterByType(ac api.Client, category path.CategoryType) (addedAndRemovedItemIDsGetter, error) { - switch category { - case path.EmailCategory: - return ac.Mail(), nil - case path.EventsCategory: - return ac.Events(), nil - case path.ContactsCategory: - return ac.Contacts(), nil - default: - return nil, clues.New("no api client registered for category") - } -} - // createCollections - utility function that retrieves M365 // IDs through Microsoft Graph API. The selectors.ExchangeScope // determines the type of collections that are retrieved. func createCollections( ctx context.Context, - creds account.M365Config, + handlers map[path.CategoryType]backupHandler, + tenantID string, user idname.Provider, scope selectors.ExchangeScope, dps DeltaPaths, @@ -264,46 +251,40 @@ func createCollections( su support.StatusUpdater, errs *fault.Bus, ) ([]data.BackupCollection, error) { + ctx = clues.Add(ctx, "category", scope.Category().PathType()) + var ( allCollections = make([]data.BackupCollection, 0) category = scope.Category().PathType() + qp = graph.QueryParams{ + Category: category, + ResourceOwner: user, + TenantID: tenantID, + } ) - ac, err := api.NewClient(creds) - if err != nil { - return nil, clues.Wrap(err, "getting api client").WithClues(ctx) + handler, ok := handlers[category] + if !ok { + return nil, clues.New("unsupported backup category type").WithClues(ctx) } - ctx = clues.Add(ctx, "category", category) - - getter, err := getterByType(ac, category) - if err != nil { - return nil, clues.Stack(err).WithClues(ctx) - } - - qp := graph.QueryParams{ - Category: category, - ResourceOwner: user, - Credentials: creds, - } - - foldersComplete, closer := observe.MessageWithCompletion( + foldersComplete := observe.MessageWithCompletion( ctx, observe.Bulletf("%s", qp.Category)) - defer closer() defer close(foldersComplete) - resolver, err := PopulateExchangeContainerResolver(ctx, qp, errs) - if err != nil { + rootFolder, cc := handler.NewContainerCache(user.ID()) + + if err := cc.Populate(ctx, errs, rootFolder); err != nil { return nil, clues.Wrap(err, "populating container cache") } collections, err := filterContainersAndFillCollections( ctx, qp, - getter, + handler, su, - resolver, + cc, scope, dps, ctrlOpts, diff --git a/src/internal/connector/exchange/data_collections_test.go b/src/internal/connector/exchange/data_collections_test.go index 00dde5ebd..42761f9ac 100644 --- a/src/internal/connector/exchange/data_collections_test.go +++ b/src/internal/connector/exchange/data_collections_test.go @@ -222,8 +222,10 @@ func newStatusUpdater(t *testing.T, wg *sync.WaitGroup) func(status *support.Con type DataCollectionsIntegrationSuite struct { tester.Suite - user string - site string + user string + site string + tenantID string + ac api.Client } func TestDataCollectionsIntegrationSuite(t *testing.T) { @@ -239,18 +241,25 @@ func (suite *DataCollectionsIntegrationSuite) SetupSuite() { suite.user = tester.M365UserID(suite.T()) suite.site = tester.M365SiteID(suite.T()) + acct := tester.NewM365Account(suite.T()) + creds, err := acct.M365Config() + require.NoError(suite.T(), err, clues.ToCore(err)) + + suite.ac, err = api.NewClient(creds) + require.NoError(suite.T(), err, clues.ToCore(err)) + + suite.tenantID = creds.AzureTenantID + tester.LogTimeOfTest(suite.T()) } func (suite *DataCollectionsIntegrationSuite) TestMailFetch() { var ( - userID = tester.M365UserID(suite.T()) - users = []string{userID} - acct, err = tester.NewM365Account(suite.T()).M365Config() + userID = tester.M365UserID(suite.T()) + users = []string{userID} + handlers = BackupHandlers(suite.ac) ) - require.NoError(suite.T(), err, clues.ToCore(err)) - tests := []struct { name string scope selectors.ExchangeScope @@ -293,7 +302,8 @@ func (suite *DataCollectionsIntegrationSuite) TestMailFetch() { collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(userID, userID), test.scope, DeltaPaths{}, @@ -329,13 +339,11 @@ func (suite *DataCollectionsIntegrationSuite) TestMailFetch() { func (suite *DataCollectionsIntegrationSuite) TestDelta() { var ( - userID = tester.M365UserID(suite.T()) - users = []string{userID} - acct, err = tester.NewM365Account(suite.T()).M365Config() + userID = tester.M365UserID(suite.T()) + users = []string{userID} + handlers = BackupHandlers(suite.ac) ) - require.NoError(suite.T(), err, clues.ToCore(err)) - tests := []struct { name string scope selectors.ExchangeScope @@ -372,7 +380,8 @@ func (suite *DataCollectionsIntegrationSuite) TestDelta() { // get collections without providing any delta history (ie: full backup) collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(userID, userID), test.scope, DeltaPaths{}, @@ -403,7 +412,8 @@ func (suite *DataCollectionsIntegrationSuite) TestDelta() { // which should only contain the difference. collections, err = createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(userID, userID), test.scope, dps, @@ -438,19 +448,18 @@ func (suite *DataCollectionsIntegrationSuite) TestMailSerializationRegression() defer flush() var ( - wg sync.WaitGroup - users = []string{suite.user} + wg sync.WaitGroup + users = []string{suite.user} + handlers = BackupHandlers(suite.ac) ) - acct, err := tester.NewM365Account(t).M365Config() - require.NoError(t, err, clues.ToCore(err)) - sel := selectors.NewExchangeBackup(users) sel.Include(sel.MailFolders([]string{DefaultMailFolder}, selectors.PrefixMatch())) collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(suite.user, suite.user), sel.Scopes()[0], DeltaPaths{}, @@ -497,10 +506,10 @@ func (suite *DataCollectionsIntegrationSuite) TestMailSerializationRegression() // and to store contact within Collection. Downloaded contacts are run through // a regression test to ensure that downloaded items can be uploaded. func (suite *DataCollectionsIntegrationSuite) TestContactSerializationRegression() { - acct, err := tester.NewM365Account(suite.T()).M365Config() - require.NoError(suite.T(), err, clues.ToCore(err)) - - users := []string{suite.user} + var ( + users = []string{suite.user} + handlers = BackupHandlers(suite.ac) + ) tests := []struct { name string @@ -525,7 +534,8 @@ func (suite *DataCollectionsIntegrationSuite) TestContactSerializationRegression edcs, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(suite.user, suite.user), test.scope, DeltaPaths{}, @@ -589,17 +599,11 @@ func (suite *DataCollectionsIntegrationSuite) TestEventsSerializationRegression( ctx, flush := tester.NewContext(t) defer flush() - acct, err := tester.NewM365Account(t).M365Config() - require.NoError(t, err, clues.ToCore(err)) - - users := []string{suite.user} - - ac, err := api.NewClient(acct) - require.NoError(t, err, "creating client", clues.ToCore(err)) - var ( - calID string - bdayID string + users = []string{suite.user} + handlers = BackupHandlers(suite.ac) + calID string + bdayID string ) fn := func(gcf graph.CachedContainer) error { @@ -614,7 +618,7 @@ func (suite *DataCollectionsIntegrationSuite) TestEventsSerializationRegression( return nil } - err = ac.Events().EnumerateContainers(ctx, suite.user, DefaultCalendar, fn, fault.New(true)) + err := suite.ac.Events().EnumerateContainers(ctx, suite.user, DefaultCalendar, fn, fault.New(true)) require.NoError(t, err, clues.ToCore(err)) tests := []struct { @@ -650,7 +654,8 @@ func (suite *DataCollectionsIntegrationSuite) TestEventsSerializationRegression( collections, err := createCollections( ctx, - acct, + handlers, + suite.tenantID, inMock.NewProvider(suite.user, suite.user), test.scope, DeltaPaths{}, diff --git a/src/internal/connector/exchange/events_backup.go b/src/internal/connector/exchange/events_backup.go new file mode 100644 index 000000000..f77a6a1a3 --- /dev/null +++ b/src/internal/connector/exchange/events_backup.go @@ -0,0 +1,40 @@ +package exchange + +import ( + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ backupHandler = &eventBackupHandler{} + +type eventBackupHandler struct { + ac api.Events +} + +func newEventBackupHandler( + ac api.Client, +) eventBackupHandler { + ace := ac.Events() + + return eventBackupHandler{ + ac: ace, + } +} + +func (h eventBackupHandler) itemEnumerator() addedAndRemovedItemGetter { + return h.ac +} + +func (h eventBackupHandler) itemHandler() itemGetterSerializer { + return h.ac +} + +func (h eventBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return DefaultCalendar, &eventCalendarCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} diff --git a/src/internal/connector/exchange/events_restore.go b/src/internal/connector/exchange/events_restore.go new file mode 100644 index 000000000..a45de05aa --- /dev/null +++ b/src/internal/connector/exchange/events_restore.go @@ -0,0 +1,109 @@ +package exchange + +import ( + "context" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ itemRestorer = &eventRestoreHandler{} + +type eventRestoreHandler struct { + ac api.Events + ip itemPoster[models.Eventable] +} + +func newEventRestoreHandler( + ac api.Client, +) eventRestoreHandler { + ace := ac.Events() + + return eventRestoreHandler{ + ac: ace, + ip: ace, + } +} + +func (h eventRestoreHandler) newContainerCache(userID string) graph.ContainerResolver { + return &eventCalendarCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} + +func (h eventRestoreHandler) formatRestoreDestination( + destinationContainerName string, + _ path.Path, // ignored because calendars cannot be nested +) *path.Builder { + return path.Builder{}.Append(destinationContainerName) +} + +func (h eventRestoreHandler) CreateContainer( + ctx context.Context, + userID, containerName, _ string, // parent container not used +) (graph.Container, error) { + return h.ac.CreateContainer(ctx, userID, containerName, "") +} + +func (h eventRestoreHandler) containerSearcher() containerByNamer { + return h.ac +} + +// always returns the provided value +func (h eventRestoreHandler) orRootContainer(c string) string { + return c +} + +func (h eventRestoreHandler) restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, +) (*details.ExchangeInfo, error) { + event, err := api.BytesToEventable(body) + if err != nil { + return nil, clues.Wrap(err, "creating event from bytes").WithClues(ctx) + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(event.GetId())) + + event = toEventSimplified(event) + + var attachments []models.Attachmentable + + if ptr.Val(event.GetHasAttachments()) { + attachments = event.GetAttachments() + event.SetAttachments([]models.Attachmentable{}) + } + + item, err := h.ip.PostItem(ctx, userID, destinationID, event) + if err != nil { + return nil, graph.Wrap(ctx, err, "restoring mail message") + } + + err = uploadAttachments( + ctx, + h.ac, + attachments, + userID, + destinationID, + ptr.Val(item.GetId()), + errs) + if err != nil { + return nil, clues.Stack(err) + } + + info := api.EventInfo(event) + info.Size = int64(len(body)) + + return info, nil +} diff --git a/src/internal/connector/exchange/events_restore_test.go b/src/internal/connector/exchange/events_restore_test.go new file mode 100644 index 000000000..2060bf21a --- /dev/null +++ b/src/internal/connector/exchange/events_restore_test.go @@ -0,0 +1,57 @@ +package exchange + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type EventsRestoreIntgSuite struct { + tester.Suite + creds account.M365Config + ac api.Client + userID string +} + +func TestEventsRestoreIntgSuite(t *testing.T) { + suite.Run(t, &EventsRestoreIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs}), + }) +} + +func (suite *EventsRestoreIntgSuite) SetupSuite() { + t := suite.T() + + a := tester.NewM365Account(t) + creds, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.creds = creds + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) + + suite.userID = tester.M365UserID(t) +} + +// Testing to ensure that cache system works for in multiple different environments +func (suite *EventsRestoreIntgSuite) TestCreateContainerDestination() { + runCreateDestinationTest( + suite.T(), + newMailRestoreHandler(suite.ac), + path.EmailCategory, + suite.creds.AzureTenantID, + suite.userID, + tester.DefaultTestRestoreDestination("").ContainerName, + []string{"Durmstrang"}, + []string{"Beauxbatons"}) +} diff --git a/src/internal/connector/exchange/exchange_data_collection.go b/src/internal/connector/exchange/exchange_data_collection.go index 441056ed6..921952b88 100644 --- a/src/internal/connector/exchange/exchange_data_collection.go +++ b/src/internal/connector/exchange/exchange_data_collection.go @@ -12,7 +12,6 @@ import ( "time" "github.com/alcionai/clues" - "github.com/microsoft/kiota-abstractions-go/serialization" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -37,20 +36,6 @@ const ( numberOfRetries = 4 ) -type itemer interface { - GetItem( - ctx context.Context, - user, itemID string, - immutableIDs bool, - errs *fault.Bus, - ) (serialization.Parsable, *details.ExchangeInfo, error) - Serialize( - ctx context.Context, - item serialization.Parsable, - user, itemID string, - ) ([]byte, error) -} - // Collection implements the interface from data.Collection // Structure holds data for an Exchange application for a single user type Collection struct { @@ -63,7 +48,7 @@ type Collection struct { // removed is a list of item IDs that were deleted from, or moved out, of a container removed map[string]struct{} - items itemer + items itemGetterSerializer category path.CategoryType statusUpdater support.StatusUpdater @@ -98,7 +83,7 @@ func NewCollection( curr, prev path.Path, location *path.Builder, category path.CategoryType, - items itemer, + items itemGetterSerializer, statusUpdater support.StatusUpdater, ctrlOpts control.Options, doNotMergeItems bool, @@ -178,14 +163,11 @@ func (col *Collection) streamItems(ctx context.Context, errs *fault.Bus) { }() if len(col.added)+len(col.removed) > 0 { - var closer func() - colProgress, closer = observe.CollectionProgress( + colProgress = observe.CollectionProgress( ctx, col.fullPath.Category().String(), col.LocationPath().Elements()) - go closer() - defer func() { close(colProgress) }() diff --git a/src/internal/connector/exchange/handlers.go b/src/internal/connector/exchange/handlers.go new file mode 100644 index 000000000..0538ebd17 --- /dev/null +++ b/src/internal/connector/exchange/handlers.go @@ -0,0 +1,131 @@ +package exchange + +import ( + "context" + + "github.com/microsoft/kiota-abstractions-go/serialization" + + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +// --------------------------------------------------------------------------- +// backup +// --------------------------------------------------------------------------- + +type backupHandler interface { + itemEnumerator() addedAndRemovedItemGetter + itemHandler() itemGetterSerializer + NewContainerCache(userID string) (string, graph.ContainerResolver) +} + +type addedAndRemovedItemGetter interface { + GetAddedAndRemovedItemIDs( + ctx context.Context, + user, containerID, oldDeltaToken string, + immutableIDs bool, + canMakeDeltaQueries bool, + ) ([]string, []string, api.DeltaUpdate, error) +} + +type itemGetterSerializer interface { + GetItem( + ctx context.Context, + user, itemID string, + immutableIDs bool, + errs *fault.Bus, + ) (serialization.Parsable, *details.ExchangeInfo, error) + Serialize( + ctx context.Context, + item serialization.Parsable, + user, itemID string, + ) ([]byte, error) +} + +func BackupHandlers(ac api.Client) map[path.CategoryType]backupHandler { + return map[path.CategoryType]backupHandler{ + path.ContactsCategory: newContactBackupHandler(ac), + path.EmailCategory: newMailBackupHandler(ac), + path.EventsCategory: newEventBackupHandler(ac), + } +} + +// --------------------------------------------------------------------------- +// restore +// --------------------------------------------------------------------------- + +type restoreHandler interface { + itemRestorer + containerAPI + newContainerCache(userID string) graph.ContainerResolver + formatRestoreDestination( + destinationContainerName string, + collectionFullPath path.Path, + ) *path.Builder +} + +// runs the item restoration (ie: item creation) process +// for a single item, whose summary contents are held in +// the body property. +type itemRestorer interface { + restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, + ) (*details.ExchangeInfo, error) +} + +// runs the actual graph API post request. +type itemPoster[T any] interface { + PostItem( + ctx context.Context, + userID, dirID string, + body T, + ) (T, error) +} + +// produces structs that interface with the graph/cache_container +// CachedContainer interface. +type containerAPI interface { + // POSTs the creation of a new container + CreateContainer( + ctx context.Context, + userID, containerName, parentContainerID string, + ) (graph.Container, error) + + // GETs a container by name. + // if containerByNamer is nil, this functionality is not supported + // and should be skipped by the caller. + // normally, we'd alias the func directly. The indirection here + // is because not all types comply with GetContainerByName. + containerSearcher() containerByNamer + + // returns either the provided value (assumed to be the root + // folder for that cache tree), or the default root container + // (if the category uses a root folder that exists above the + // restore location path). + orRootContainer(string) string +} + +type containerByNamer interface { + // searches for a container by name. + GetContainerByName( + ctx context.Context, + userID, containerName string, + ) (graph.Container, error) +} + +// primary interface controller for all per-cateogry restoration behavior. +func restoreHandlers( + ac api.Client, +) map[path.CategoryType]restoreHandler { + return map[path.CategoryType]restoreHandler{ + path.ContactsCategory: newContactRestoreHandler(ac), + path.EmailCategory: newMailRestoreHandler(ac), + path.EventsCategory: newEventRestoreHandler(ac), + } +} diff --git a/src/internal/connector/exchange/mail_backup.go b/src/internal/connector/exchange/mail_backup.go new file mode 100644 index 000000000..1491a683a --- /dev/null +++ b/src/internal/connector/exchange/mail_backup.go @@ -0,0 +1,40 @@ +package exchange + +import ( + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ backupHandler = &mailBackupHandler{} + +type mailBackupHandler struct { + ac api.Mail +} + +func newMailBackupHandler( + ac api.Client, +) mailBackupHandler { + acm := ac.Mail() + + return mailBackupHandler{ + ac: acm, + } +} + +func (h mailBackupHandler) itemEnumerator() addedAndRemovedItemGetter { + return h.ac +} + +func (h mailBackupHandler) itemHandler() itemGetterSerializer { + return h.ac +} + +func (h mailBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return rootFolderAlias, &mailFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} diff --git a/src/internal/connector/exchange/mail_restore.go b/src/internal/connector/exchange/mail_restore.go new file mode 100644 index 000000000..a6e8d2e0e --- /dev/null +++ b/src/internal/connector/exchange/mail_restore.go @@ -0,0 +1,140 @@ +package exchange + +import ( + "context" + + "github.com/alcionai/clues" + "github.com/microsoftgraph/msgraph-sdk-go/models" + + "github.com/alcionai/corso/src/internal/common/dttm" + "github.com/alcionai/corso/src/internal/common/ptr" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/backup/details" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +var _ itemRestorer = &mailRestoreHandler{} + +type mailRestoreHandler struct { + ac api.Mail + ip itemPoster[models.Messageable] +} + +func newMailRestoreHandler( + ac api.Client, +) mailRestoreHandler { + acm := ac.Mail() + + return mailRestoreHandler{ + ac: acm, + ip: acm, + } +} + +func (h mailRestoreHandler) newContainerCache(userID string) graph.ContainerResolver { + return &mailFolderCache{ + userID: userID, + enumer: h.ac, + getter: h.ac, + } +} + +func (h mailRestoreHandler) formatRestoreDestination( + destinationContainerName string, + collectionFullPath path.Path, +) *path.Builder { + return path.Builder{}.Append(destinationContainerName).Append(collectionFullPath.Folders()...) +} + +func (h mailRestoreHandler) CreateContainer( + ctx context.Context, + userID, containerName, parentContainerID string, +) (graph.Container, error) { + if len(parentContainerID) == 0 { + parentContainerID = rootFolderAlias + } + + return h.ac.CreateContainer(ctx, userID, containerName, parentContainerID) +} + +func (h mailRestoreHandler) containerSearcher() containerByNamer { + return nil +} + +// always returns rootFolderAlias +func (h mailRestoreHandler) orRootContainer(string) string { + return rootFolderAlias +} + +func (h mailRestoreHandler) restore( + ctx context.Context, + body []byte, + userID, destinationID string, + errs *fault.Bus, +) (*details.ExchangeInfo, error) { + msg, err := api.BytesToMessageable(body) + if err != nil { + return nil, clues.Wrap(err, "creating mail from bytes").WithClues(ctx) + } + + ctx = clues.Add(ctx, "item_id", ptr.Val(msg.GetId())) + msg = setMessageSVEPs(toMessage(msg)) + + attachments := msg.GetAttachments() + // Item.Attachments --> HasAttachments doesn't always have a value populated when deserialized + msg.SetAttachments([]models.Attachmentable{}) + + item, err := h.ip.PostItem(ctx, userID, destinationID, msg) + if err != nil { + return nil, graph.Wrap(ctx, err, "restoring mail message") + } + + err = uploadAttachments( + ctx, + h.ac, + attachments, + userID, + destinationID, + ptr.Val(item.GetId()), + errs) + if err != nil { + return nil, clues.Stack(err) + } + + return api.MailInfo(msg, int64(len(body))), nil +} + +func setMessageSVEPs(msg models.Messageable) models.Messageable { + // Set Extended Properties: + svlep := make([]models.SingleValueLegacyExtendedPropertyable, 0) + + // prevent "resending" of the mail in the graph api backstore + sv1 := models.NewSingleValueLegacyExtendedProperty() + sv1.SetId(ptr.To(MailRestorePropertyTag)) + sv1.SetValue(ptr.To(RestoreCanonicalEnableValue)) + svlep = append(svlep, sv1) + + // establish the sent date + if msg.GetSentDateTime() != nil { + sv2 := models.NewSingleValueLegacyExtendedProperty() + sv2.SetId(ptr.To(MailSendDateTimeOverrideProperty)) + sv2.SetValue(ptr.To(dttm.FormatToLegacy(ptr.Val(msg.GetSentDateTime())))) + + svlep = append(svlep, sv2) + } + + // establish the received Date + if msg.GetReceivedDateTime() != nil { + sv3 := models.NewSingleValueLegacyExtendedProperty() + sv3.SetId(ptr.To(MailReceiveDateTimeOverriveProperty)) + sv3.SetValue(ptr.To(dttm.FormatToLegacy(ptr.Val(msg.GetReceivedDateTime())))) + + svlep = append(svlep, sv3) + } + + msg.SetSingleValueExtendedProperties(svlep) + + return msg +} diff --git a/src/internal/connector/exchange/mail_restore_test.go b/src/internal/connector/exchange/mail_restore_test.go new file mode 100644 index 000000000..8edcedd4c --- /dev/null +++ b/src/internal/connector/exchange/mail_restore_test.go @@ -0,0 +1,57 @@ +package exchange + +import ( + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + "github.com/stretchr/testify/suite" + + "github.com/alcionai/corso/src/internal/tester" + "github.com/alcionai/corso/src/pkg/account" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +type MailRestoreIntgSuite struct { + tester.Suite + creds account.M365Config + ac api.Client + userID string +} + +func TestMailRestoreIntgSuite(t *testing.T) { + suite.Run(t, &MailRestoreIntgSuite{ + Suite: tester.NewIntegrationSuite( + t, + [][]string{tester.M365AcctCredEnvs}), + }) +} + +func (suite *MailRestoreIntgSuite) SetupSuite() { + t := suite.T() + + a := tester.NewM365Account(t) + creds, err := a.M365Config() + require.NoError(t, err, clues.ToCore(err)) + + suite.creds = creds + + suite.ac, err = api.NewClient(creds) + require.NoError(t, err, clues.ToCore(err)) + + suite.userID = tester.M365UserID(t) +} + +// Testing to ensure that cache system works for in multiple different environments +func (suite *MailRestoreIntgSuite) TestCreateContainerDestination() { + runCreateDestinationTest( + suite.T(), + newMailRestoreHandler(suite.ac), + path.EmailCategory, + suite.creds.AzureTenantID, + suite.userID, + tester.DefaultTestRestoreDestination("").ContainerName, + []string{"Griffindor", "Croix"}, + []string{"Griffindor", "Felicius"}) +} diff --git a/src/internal/connector/exchange/restore_test.go b/src/internal/connector/exchange/restore_test.go index 3b98a88f0..36d7101be 100644 --- a/src/internal/connector/exchange/restore_test.go +++ b/src/internal/connector/exchange/restore_test.go @@ -14,7 +14,6 @@ import ( "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/tester" "github.com/alcionai/corso/src/pkg/account" - "github.com/alcionai/corso/src/pkg/control" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/path" "github.com/alcionai/corso/src/pkg/services/m365/api" @@ -66,9 +65,10 @@ func (suite *RestoreIntgSuite) TestRestoreContact() { var ( userID = tester.M365UserID(t) folderName = tester.DefaultTestRestoreDestination("contact").ContainerName + handler = newContactRestoreHandler(suite.ac) ) - aFolder, err := suite.ac.Contacts().CreateContactFolder(ctx, userID, folderName) + aFolder, err := handler.ac.CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) folderID := ptr.Val(aFolder.GetId()) @@ -79,13 +79,11 @@ func (suite *RestoreIntgSuite) TestRestoreContact() { assert.NoError(t, err, clues.ToCore(err)) }() - info, err := RestoreContact( + info, err := handler.restore( ctx, exchMock.ContactBytes("Corso TestContact"), - suite.ac.Contacts(), - control.Copy, - folderID, - userID) + userID, folderID, + fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, info, "contact item info") } @@ -101,9 +99,10 @@ func (suite *RestoreIntgSuite) TestRestoreEvent() { var ( userID = tester.M365UserID(t) subject = tester.DefaultTestRestoreDestination("event").ContainerName + handler = newEventRestoreHandler(suite.ac) ) - calendar, err := suite.ac.Events().CreateCalendar(ctx, userID, subject) + calendar, err := handler.ac.CreateContainer(ctx, userID, subject, "") require.NoError(t, err, clues.ToCore(err)) calendarID := ptr.Val(calendar.GetId()) @@ -135,15 +134,10 @@ func (suite *RestoreIntgSuite) TestRestoreEvent() { ctx, flush := tester.NewContext(t) defer flush() - info, err := RestoreEvent( + info, err := handler.restore( ctx, test.bytes, - suite.ac.Events(), - suite.ac.Events(), - suite.gs, - control.Copy, - calendarID, - userID, + userID, calendarID, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, info, "event item info") @@ -154,12 +148,8 @@ func (suite *RestoreIntgSuite) TestRestoreEvent() { // TestRestoreExchangeObject verifies path.Category usage for restored objects func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { t := suite.T() - a := tester.NewM365Account(t) - m365, err := a.M365Config() - require.NoError(t, err, clues.ToCore(err)) - service, err := createService(m365) - require.NoError(t, err, clues.ToCore(err)) + handlers := restoreHandlers(suite.ac) userID := tester.M365UserID(suite.T()) @@ -175,7 +165,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailobj").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -187,7 +178,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailwattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -199,7 +191,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("eventwattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -211,13 +204,13 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailitemattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) }, }, - //TODO: Neha complete as part of https://github.com/alcionai/corso/issues/2428 // { // name: "Test Mail: Hydrated Item Attachment Mail", @@ -228,7 +221,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { // category: path.EmailCategory, // destination: func(t *testing.T, ctx context.Context) string { // folderName := tester.DefaultTestRestoreDestination("mailbasicattch").ContainerName - // folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + // folder, err := handlers[path.EmailCategory]. + // CreateContainer(ctx, userID, folderName, "") // require.NoError(t, err, clues.ToCore(err)) // return ptr.Val(folder.GetId()) @@ -243,7 +237,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { // category: path.EmailCategory, // destination: func(t *testing.T, ctx context.Context) string { // folderName := tester.DefaultTestRestoreDestination("mailnestattch").ContainerName - // folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + // folder, err := handlers[path.EmailCategory]. + // CreateContainer(ctx, userID, folderName, "") // require.NoError(t, err, clues.ToCore(err)) // return ptr.Val(folder.GetId()) @@ -258,7 +253,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailcontactattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -270,7 +266,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("nestedattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -282,7 +279,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("maillargeattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -294,7 +292,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailtwoattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -306,20 +305,21 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EmailCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("mailrefattch").ContainerName - folder, err := suite.ac.Mail().CreateMailFolder(ctx, userID, folderName) + folder, err := handlers[path.EmailCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) }, }, - // TODO: #884 - reinstate when able to specify root folder by name { name: "Test Contact", bytes: exchMock.ContactBytes("Test_Omega"), category: path.ContactsCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("contact").ContainerName - folder, err := suite.ac.Contacts().CreateContactFolder(ctx, userID, folderName) + folder, err := handlers[path.ContactsCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(folder.GetId()) @@ -331,7 +331,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EventsCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("event").ContainerName - calendar, err := suite.ac.Events().CreateCalendar(ctx, userID, folderName) + calendar, err := handlers[path.EventsCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(calendar.GetId()) @@ -343,7 +344,8 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { category: path.EventsCategory, destination: func(t *testing.T, ctx context.Context) string { folderName := tester.DefaultTestRestoreDestination("eventobj").ContainerName - calendar, err := suite.ac.Events().CreateCalendar(ctx, userID, folderName) + calendar, err := handlers[path.EventsCategory]. + CreateContainer(ctx, userID, folderName, "") require.NoError(t, err, clues.ToCore(err)) return ptr.Val(calendar.GetId()) @@ -359,15 +361,10 @@ func (suite *RestoreIntgSuite) TestRestoreExchangeObject() { defer flush() destination := test.destination(t, ctx) - info, err := RestoreItem( + info, err := handlers[test.category].restore( ctx, test.bytes, - test.category, - control.Copy, - suite.ac, - service, - destination, - userID, + userID, destination, fault.New(true)) assert.NoError(t, err, clues.ToCore(err)) assert.NotNil(t, info, "item info was not populated") diff --git a/src/internal/connector/exchange/service_functions.go b/src/internal/connector/exchange/service_functions.go deleted file mode 100644 index bde0f1ae4..000000000 --- a/src/internal/connector/exchange/service_functions.go +++ /dev/null @@ -1,160 +0,0 @@ -package exchange - -import ( - "context" - - "github.com/alcionai/clues" - - "github.com/alcionai/corso/src/internal/common/ptr" - "github.com/alcionai/corso/src/internal/connector/graph" - "github.com/alcionai/corso/src/pkg/account" - "github.com/alcionai/corso/src/pkg/fault" - "github.com/alcionai/corso/src/pkg/logger" - "github.com/alcionai/corso/src/pkg/path" - "github.com/alcionai/corso/src/pkg/selectors" - "github.com/alcionai/corso/src/pkg/services/m365/api" -) - -var ErrFolderNotFound = clues.New("folder not found") - -func createService(credentials account.M365Config) (*graph.Service, error) { - adapter, err := graph.CreateAdapter( - credentials.AzureTenantID, - credentials.AzureClientID, - credentials.AzureClientSecret) - if err != nil { - return nil, clues.Wrap(err, "creating microsoft graph service for exchange") - } - - return graph.NewService(adapter), nil -} - -// populateExchangeContainerResolver gets a folder resolver if one is available for -// this category of data. If one is not available, returns nil so that other -// logic in the caller can complete as long as they check if the resolver is not -// nil. If an error occurs populating the resolver, returns an error. -func PopulateExchangeContainerResolver( - ctx context.Context, - qp graph.QueryParams, - errs *fault.Bus, -) (graph.ContainerResolver, error) { - var ( - res graph.ContainerResolver - cacheRoot string - ) - - ac, err := api.NewClient(qp.Credentials) - if err != nil { - return nil, err - } - - switch qp.Category { - case path.EmailCategory: - acm := ac.Mail() - res = &mailFolderCache{ - userID: qp.ResourceOwner.ID(), - getter: acm, - enumer: acm, - } - cacheRoot = rootFolderAlias - - case path.ContactsCategory: - acc := ac.Contacts() - res = &contactFolderCache{ - userID: qp.ResourceOwner.ID(), - getter: acc, - enumer: acc, - } - cacheRoot = DefaultContactFolder - - case path.EventsCategory: - ecc := ac.Events() - res = &eventCalendarCache{ - userID: qp.ResourceOwner.ID(), - getter: ecc, - enumer: ecc, - } - cacheRoot = DefaultCalendar - - default: - return nil, clues.New("no container resolver registered for category").WithClues(ctx) - } - - if err := res.Populate(ctx, errs, cacheRoot); err != nil { - return nil, clues.Wrap(err, "populating directory resolver").WithClues(ctx) - } - - return res, nil -} - -// Returns true if the container passes the scope comparison and should be included. -// Returns: -// - the path representing the directory as it should be stored in the repository. -// - the human-readable path using display names. -// - true if the path passes the scope comparison. -func includeContainer( - ctx context.Context, - qp graph.QueryParams, - c graph.CachedContainer, - scope selectors.ExchangeScope, - category path.CategoryType, -) (path.Path, *path.Builder, bool) { - var ( - directory string - locPath path.Path - pb = c.Path() - loc = c.Location() - ) - - // Clause ensures that DefaultContactFolder is inspected properly - if category == path.ContactsCategory && ptr.Val(c.GetDisplayName()) == DefaultContactFolder { - loc = loc.Append(DefaultContactFolder) - } - - dirPath, err := pb.ToDataLayerExchangePathForCategory( - qp.Credentials.AzureTenantID, - qp.ResourceOwner.ID(), - category, - false) - // Containers without a path (e.g. Root mail folder) always err here. - if err != nil { - return nil, nil, false - } - - directory = dirPath.Folder(false) - - if loc != nil { - locPath, err = loc.ToDataLayerExchangePathForCategory( - qp.Credentials.AzureTenantID, - qp.ResourceOwner.ID(), - category, - false) - // Containers without a path (e.g. Root mail folder) always err here. - if err != nil { - return nil, nil, false - } - - directory = locPath.Folder(false) - } - - var ok bool - - switch category { - case path.EmailCategory: - ok = scope.Matches(selectors.ExchangeMailFolder, directory) - case path.ContactsCategory: - ok = scope.Matches(selectors.ExchangeContactFolder, directory) - case path.EventsCategory: - ok = scope.Matches(selectors.ExchangeEventCalendar, directory) - default: - return nil, nil, false - } - - logger.Ctx(ctx).With( - "included", ok, - "scope", scope, - "matches_input", directory, - ).Debug("backup folder selection filter") - - return dirPath, loc, ok -} diff --git a/src/internal/connector/exchange/service_iterators.go b/src/internal/connector/exchange/service_iterators.go index 3d96cf4b5..021c15cbc 100644 --- a/src/internal/connector/exchange/service_iterators.go +++ b/src/internal/connector/exchange/service_iterators.go @@ -18,15 +18,6 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -type addedAndRemovedItemIDsGetter interface { - GetAddedAndRemovedItemIDs( - ctx context.Context, - user, containerID, oldDeltaToken string, - immutableIDs bool, - canMakeDeltaQueries bool, - ) ([]string, []string, api.DeltaUpdate, error) -} - // filterContainersAndFillCollections is a utility function // that places the M365 object ids belonging to specific directories // into a BackupCollection. Messages outside of those directories are omitted. @@ -39,7 +30,7 @@ type addedAndRemovedItemIDsGetter interface { func filterContainersAndFillCollections( ctx context.Context, qp graph.QueryParams, - getter addedAndRemovedItemIDsGetter, + bh backupHandler, statusUpdater support.StatusUpdater, resolver graph.ContainerResolver, scope selectors.ExchangeScope, @@ -61,19 +52,6 @@ func filterContainersAndFillCollections( logger.Ctx(ctx).Infow("filling collections", "len_deltapaths", len(dps)) - // TODO(rkeepers): this should be passed in from the caller, probably - // as an interface that satisfies the NewCollection requirements. - // But this will work for the short term. - ac, err := api.NewClient(qp.Credentials) - if err != nil { - return nil, err - } - - ibt, err := itemerByType(ac, category) - if err != nil { - return nil, err - } - el := errs.Local() for _, c := range resolver.Items() { @@ -85,6 +63,7 @@ func filterContainersAndFillCollections( delete(tombstones, cID) var ( + err error dp = dps[cID] prevDelta = dp.Delta prevPathStr = dp.Path // do not log: pii; log prevPath instead @@ -115,13 +94,14 @@ func filterContainersAndFillCollections( ictx = clues.Add(ictx, "previous_path", prevPath) - added, removed, newDelta, err := getter.GetAddedAndRemovedItemIDs( - ictx, - qp.ResourceOwner.ID(), - cID, - prevDelta, - ctrlOpts.ToggleFeatures.ExchangeImmutableIDs, - !ctrlOpts.ToggleFeatures.DisableDelta) + added, removed, newDelta, err := bh.itemEnumerator(). + GetAddedAndRemovedItemIDs( + ictx, + qp.ResourceOwner.ID(), + cID, + prevDelta, + ctrlOpts.ToggleFeatures.ExchangeImmutableIDs, + !ctrlOpts.ToggleFeatures.DisableDelta) if err != nil { if !graph.IsErrDeletedInFlight(err) { el.AddRecoverable(clues.Stack(err).Label(fault.LabelForceNoBackupCreation)) @@ -148,7 +128,7 @@ func filterContainersAndFillCollections( prevPath, locPath, category, - ibt, + bh.itemHandler(), statusUpdater, ctrlOpts, newDelta.Reset) @@ -181,7 +161,10 @@ func filterContainersAndFillCollections( return nil, el.Failure() } - ictx := clues.Add(ctx, "tombstone_id", id) + var ( + err error + ictx = clues.Add(ctx, "tombstone_id", id) + ) if collections[id] != nil { el.AddRecoverable(clues.Wrap(err, "conflict: tombstone exists for a live collection").WithClues(ictx)) @@ -207,7 +190,7 @@ func filterContainersAndFillCollections( prevPath, nil, // tombstones don't need a location category, - ibt, + bh.itemHandler(), statusUpdater, ctrlOpts, false) @@ -220,7 +203,7 @@ func filterContainersAndFillCollections( "num_deltas_entries", len(deltaURLs)) col, err := graph.MakeMetadataCollection( - qp.Credentials.AzureTenantID, + qp.TenantID, qp.ResourceOwner.ID(), path.ExchangeService, qp.Category, @@ -260,15 +243,74 @@ func pathFromPrevString(ps string) (path.Path, error) { return p, nil } -func itemerByType(ac api.Client, category path.CategoryType) (itemer, error) { +// Returns true if the container passes the scope comparison and should be included. +// Returns: +// - the path representing the directory as it should be stored in the repository. +// - the human-readable path using display names. +// - true if the path passes the scope comparison. +func includeContainer( + ctx context.Context, + qp graph.QueryParams, + c graph.CachedContainer, + scope selectors.ExchangeScope, + category path.CategoryType, +) (path.Path, *path.Builder, bool) { + var ( + directory string + locPath path.Path + pb = c.Path() + loc = c.Location() + ) + + // Clause ensures that DefaultContactFolder is inspected properly + if category == path.ContactsCategory && ptr.Val(c.GetDisplayName()) == DefaultContactFolder { + loc = loc.Append(DefaultContactFolder) + } + + dirPath, err := pb.ToDataLayerExchangePathForCategory( + qp.TenantID, + qp.ResourceOwner.ID(), + category, + false) + // Containers without a path (e.g. Root mail folder) always err here. + if err != nil { + return nil, nil, false + } + + directory = dirPath.Folder(false) + + if loc != nil { + locPath, err = loc.ToDataLayerExchangePathForCategory( + qp.TenantID, + qp.ResourceOwner.ID(), + category, + false) + // Containers without a path (e.g. Root mail folder) always err here. + if err != nil { + return nil, nil, false + } + + directory = locPath.Folder(false) + } + + var ok bool + switch category { case path.EmailCategory: - return ac.Mail(), nil - case path.EventsCategory: - return ac.Events(), nil + ok = scope.Matches(selectors.ExchangeMailFolder, directory) case path.ContactsCategory: - return ac.Contacts(), nil + ok = scope.Matches(selectors.ExchangeContactFolder, directory) + case path.EventsCategory: + ok = scope.Matches(selectors.ExchangeEventCalendar, directory) default: - return nil, clues.New("category not registered in getFetchIDFunc") + return nil, nil, false } + + logger.Ctx(ctx).With( + "included", ok, + "scope", scope, + "matches_input", directory, + ).Debug("backup folder selection filter") + + return dirPath, loc, ok } diff --git a/src/internal/connector/exchange/service_iterators_test.go b/src/internal/connector/exchange/service_iterators_test.go index 7588f39c8..f3b75ffa3 100644 --- a/src/internal/connector/exchange/service_iterators_test.go +++ b/src/internal/connector/exchange/service_iterators_test.go @@ -27,7 +27,25 @@ import ( // mocks // --------------------------------------------------------------------------- -var _ addedAndRemovedItemIDsGetter = &mockGetter{} +var _ backupHandler = &mockBackupHandler{} + +type mockBackupHandler struct { + mg mockGetter + category path.CategoryType + ac api.Client + userID string +} + +func (bh mockBackupHandler) itemEnumerator() addedAndRemovedItemGetter { return bh.mg } +func (bh mockBackupHandler) itemHandler() itemGetterSerializer { return nil } + +func (bh mockBackupHandler) NewContainerCache( + userID string, +) (string, graph.ContainerResolver) { + return BackupHandlers(bh.ac)[bh.category].NewContainerCache(bh.userID) +} + +var _ addedAndRemovedItemGetter = &mockGetter{} type ( mockGetter struct { @@ -115,7 +133,7 @@ type ServiceIteratorsSuite struct { creds account.M365Config } -func TestServiceIteratorsSuite(t *testing.T) { +func TestServiceIteratorsUnitSuite(t *testing.T) { suite.Run(t, &ServiceIteratorsSuite{Suite: tester.NewUnitSuite(t)}) } @@ -131,7 +149,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { qp = graph.QueryParams{ Category: path.EmailCategory, // doesn't matter which one we use. ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} allScope = selectors.NewExchangeBackup(nil).MailFolders(selectors.Any())[0] @@ -326,10 +344,15 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections() { ctrlOpts := control.Options{FailureHandling: test.failFast} ctrlOpts.ToggleFeatures.DisableDelta = !canMakeDeltaQueries + mbh := mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } + collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, test.resolver, test.scope, @@ -422,7 +445,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_Dupli var ( qp = graph.QueryParams{ ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} @@ -660,10 +683,15 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_Dupli ctx, flush := tester.NewContext(t) defer flush() + mbh := mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } + collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, test.resolver, sc.scope, @@ -803,7 +831,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea qp = graph.QueryParams{ Category: path.EmailCategory, // doesn't matter which one we use. ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} allScope = selectors.NewExchangeBackup(nil).MailFolders(selectors.Any())[0] @@ -815,6 +843,10 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea l: path.Builder{}.Append("display_name_1"), } resolver = newMockResolver(container1) + mbh = mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } ) require.Equal(t, "user_id", qp.ResourceOwner.ID(), qp.ResourceOwner) @@ -823,7 +855,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_repea collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, resolver, allScope, @@ -884,7 +916,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_incre qp = graph.QueryParams{ Category: cat, ResourceOwner: inMock.NewProvider("user_id", "user_name"), - Credentials: suite.creds, + TenantID: suite.creds.AzureTenantID, } statusUpdater = func(*support.ConnectorOperationStatus) {} allScope = selectors.NewExchangeBackup(nil).MailFolders(selectors.Any())[0] @@ -1226,6 +1258,11 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_incre getter.noReturnDelta = false } + mbh := mockBackupHandler{ + mg: test.getter, + category: qp.Category, + } + dps := test.dps if !deltaBefore { for k, dp := range dps { @@ -1237,7 +1274,7 @@ func (suite *ServiceIteratorsSuite) TestFilterContainersAndFillCollections_incre collections, err := filterContainersAndFillCollections( ctx, qp, - test.getter, + mbh, statusUpdater, test.resolver, allScope, diff --git a/src/internal/connector/exchange/service_restore.go b/src/internal/connector/exchange/service_restore.go index 32ba73535..663104310 100644 --- a/src/internal/connector/exchange/service_restore.go +++ b/src/internal/connector/exchange/service_restore.go @@ -3,13 +3,11 @@ package exchange import ( "bytes" "context" - "fmt" "runtime/trace" "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" - "github.com/alcionai/corso/src/internal/common/dttm" "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/support" @@ -25,216 +23,6 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -type itemPoster[T any] interface { - PostItem( - ctx context.Context, - userID, dirID string, - body T, - ) (T, error) -} - -// RestoreItem directs restore pipeline towards restore function -// based on the path.CategoryType. All input params are necessary to perform -// the type-specific restore function. -func RestoreItem( - ctx context.Context, - bits []byte, - category path.CategoryType, - policy control.CollisionPolicy, - ac api.Client, - gs graph.Servicer, - destination, user string, - errs *fault.Bus, -) (*details.ExchangeInfo, error) { - if policy != control.Copy { - return nil, clues.Wrap(clues.New(policy.String()), "policy not supported for Exchange restore").WithClues(ctx) - } - - switch category { - case path.EmailCategory: - return RestoreMessage(ctx, bits, ac.Mail(), ac.Mail(), gs, control.Copy, destination, user, errs) - case path.ContactsCategory: - return RestoreContact(ctx, bits, ac.Contacts(), control.Copy, destination, user) - case path.EventsCategory: - return RestoreEvent(ctx, bits, ac.Events(), ac.Events(), gs, control.Copy, destination, user, errs) - default: - return nil, clues.Wrap(clues.New(category.String()), "not supported for Exchange restore") - } -} - -// RestoreContact wraps api.Contacts().PostItem() -func RestoreContact( - ctx context.Context, - body []byte, - cli itemPoster[models.Contactable], - cp control.CollisionPolicy, - destination, user string, -) (*details.ExchangeInfo, error) { - contact, err := api.BytesToContactable(body) - if err != nil { - return nil, graph.Wrap(ctx, err, "creating contact from bytes") - } - - ctx = clues.Add(ctx, "item_id", ptr.Val(contact.GetId())) - - _, err = cli.PostItem(ctx, user, destination, contact) - if err != nil { - return nil, clues.Stack(err) - } - - info := api.ContactInfo(contact) - info.Size = int64(len(body)) - - return info, nil -} - -// RestoreEvent wraps api.Events().PostItem() -func RestoreEvent( - ctx context.Context, - body []byte, - itemCli itemPoster[models.Eventable], - attachmentCli attachmentPoster, - gs graph.Servicer, - cp control.CollisionPolicy, - destination, user string, - errs *fault.Bus, -) (*details.ExchangeInfo, error) { - event, err := api.BytesToEventable(body) - if err != nil { - return nil, clues.Wrap(err, "creating event from bytes").WithClues(ctx) - } - - ctx = clues.Add(ctx, "item_id", ptr.Val(event.GetId())) - - var ( - el = errs.Local() - transformedEvent = toEventSimplified(event) - attached []models.Attachmentable - ) - - if ptr.Val(event.GetHasAttachments()) { - attached = event.GetAttachments() - - transformedEvent.SetAttachments([]models.Attachmentable{}) - } - - item, err := itemCli.PostItem(ctx, user, destination, event) - if err != nil { - return nil, clues.Stack(err) - } - - for _, a := range attached { - if el.Failure() != nil { - break - } - - err := uploadAttachment( - ctx, - attachmentCli, - user, - destination, - ptr.Val(item.GetId()), - a) - if err != nil { - el.AddRecoverable(err) - } - } - - info := api.EventInfo(event) - info.Size = int64(len(body)) - - return info, el.Failure() -} - -// RestoreMessage wraps api.Mail().PostItem(), handling attachment creation along the way -func RestoreMessage( - ctx context.Context, - body []byte, - itemCli itemPoster[models.Messageable], - attachmentCli attachmentPoster, - gs graph.Servicer, - cp control.CollisionPolicy, - destination, user string, - errs *fault.Bus, -) (*details.ExchangeInfo, error) { - // Creates messageable object from original bytes - msg, err := api.BytesToMessageable(body) - if err != nil { - return nil, clues.Wrap(err, "creating mail from bytes").WithClues(ctx) - } - - ctx = clues.Add(ctx, "item_id", ptr.Val(msg.GetId())) - - var ( - clone = toMessage(msg) - valueID = MailRestorePropertyTag - enableValue = RestoreCanonicalEnableValue - ) - - // Set Extended Properties: - // 1st: No transmission - // 2nd: Send Date - // 3rd: Recv Date - svlep := make([]models.SingleValueLegacyExtendedPropertyable, 0) - sv1 := models.NewSingleValueLegacyExtendedProperty() - sv1.SetId(&valueID) - sv1.SetValue(&enableValue) - svlep = append(svlep, sv1) - - if clone.GetSentDateTime() != nil { - sv2 := models.NewSingleValueLegacyExtendedProperty() - sendPropertyValue := dttm.FormatToLegacy(ptr.Val(clone.GetSentDateTime())) - sendPropertyTag := MailSendDateTimeOverrideProperty - sv2.SetId(&sendPropertyTag) - sv2.SetValue(&sendPropertyValue) - - svlep = append(svlep, sv2) - } - - if clone.GetReceivedDateTime() != nil { - sv3 := models.NewSingleValueLegacyExtendedProperty() - recvPropertyValue := dttm.FormatToLegacy(ptr.Val(clone.GetReceivedDateTime())) - recvPropertyTag := MailReceiveDateTimeOverriveProperty - sv3.SetId(&recvPropertyTag) - sv3.SetValue(&recvPropertyValue) - - svlep = append(svlep, sv3) - } - - clone.SetSingleValueExtendedProperties(svlep) - - attached := clone.GetAttachments() - - // Item.Attachments --> HasAttachments doesn't always have a value populated when deserialized - clone.SetAttachments([]models.Attachmentable{}) - - item, err := itemCli.PostItem(ctx, user, destination, clone) - if err != nil { - return nil, graph.Wrap(ctx, err, "restoring mail message") - } - - el := errs.Local() - - for _, a := range attached { - if el.Failure() != nil { - return nil, el.Failure() - } - - err := uploadAttachment( - ctx, - attachmentCli, - user, - destination, - ptr.Val(item.GetId()), - a) - if err != nil { - el.AddRecoverable(clues.Wrap(err, "uploading mail attachment")) - } - } - - return api.MailInfo(clone, int64(len(body))), el.Failure() -} - // RestoreCollections restores M365 objects in data.RestoreCollection to MSFT // store through GraphAPI. func RestoreCollections( @@ -247,49 +35,82 @@ func RestoreCollections( deets *details.Builder, errs *fault.Bus, ) (*support.ConnectorOperationStatus, error) { + if len(dcs) == 0 { + return support.CreateStatus(ctx, support.Restore, 0, support.CollectionMetrics{}, ""), nil + } + var ( - directoryCaches = make(map[string]map[path.CategoryType]graph.ContainerResolver) - metrics support.CollectionMetrics - userID string + userID = dcs[0].FullPath().ResourceOwner() + directoryCache = make(map[path.CategoryType]graph.ContainerResolver) + handlers = restoreHandlers(ac) + metrics support.CollectionMetrics // TODO policy to be updated from external source after completion of refactoring policy = control.Copy el = errs.Local() ) - if len(dcs) > 0 { - userID = dcs[0].FullPath().ResourceOwner() - ctx = clues.Add(ctx, "resource_owner", clues.Hide(userID)) - } + ctx = clues.Add(ctx, "resource_owner", clues.Hide(userID)) for _, dc := range dcs { if el.Failure() != nil { break } - userCaches := directoryCaches[userID] - if userCaches == nil { - directoryCaches[userID] = make(map[path.CategoryType]graph.ContainerResolver) - userCaches = directoryCaches[userID] - } + var ( + isNewCache bool + category = dc.FullPath().Category() + ictx = clues.Add( + ctx, + "restore_category", category, + "restore_full_path", dc.FullPath()) + ) - containerID, err := CreateContainerDestination( - ctx, - creds, - dc.FullPath(), - dest.ContainerName, - userCaches, - errs) - if err != nil { - el.AddRecoverable(clues.Wrap(err, "creating destination").WithClues(ctx)) + handler, ok := handlers[category] + if !ok { + el.AddRecoverable(clues.New("unsupported restore path category").WithClues(ictx)) continue } - temp, canceled := restoreCollection(ctx, ac, gs, dc, containerID, policy, deets, errs) + if directoryCache[category] == nil { + directoryCache[category] = handler.newContainerCache(userID) + isNewCache = true + } + + containerID, gcr, err := createDestination( + ictx, + handler, + handler.formatRestoreDestination(dest.ContainerName, dc.FullPath()), + userID, + directoryCache[category], + isNewCache, + errs) + if err != nil { + el.AddRecoverable(err) + continue + } + + directoryCache[category] = gcr + + ictx = clues.Add(ictx, "restore_destination_id", containerID) + + temp, err := restoreCollection( + ictx, + handler, + dc, + userID, + containerID, + policy, + deets, + errs) metrics = support.CombineMetrics(metrics, temp) - if canceled { - break + if err != nil { + if graph.IsErrTimeout(err) { + break + } + + el.AddRecoverable(err) } } @@ -306,48 +127,38 @@ func RestoreCollections( // restoreCollection handles restoration of an individual collection. func restoreCollection( ctx context.Context, - ac api.Client, - gs graph.Servicer, + ir itemRestorer, dc data.RestoreCollection, - folderID string, + userID, destinationID string, policy control.CollisionPolicy, deets *details.Builder, errs *fault.Bus, -) (support.CollectionMetrics, bool) { +) (support.CollectionMetrics, error) { ctx, end := diagnostics.Span(ctx, "gc:exchange:restoreCollection", diagnostics.Label("path", dc.FullPath())) defer end() var ( - metrics support.CollectionMetrics - items = dc.Items(ctx, errs) - directory = dc.FullPath() - service = directory.Service() - category = directory.Category() - user = directory.ResourceOwner() + el = errs.Local() + metrics support.CollectionMetrics + items = dc.Items(ctx, errs) + fullPath = dc.FullPath() + category = fullPath.Category() ) - ctx = clues.Add( - ctx, - "full_path", directory, - "service", service, - "category", category) - - colProgress, closer := observe.CollectionProgress( + colProgress := observe.CollectionProgress( ctx, category.String(), - clues.Hide(directory.Folder(false))) - defer closer() + fullPath.Folder(false)) defer close(colProgress) for { select { case <-ctx.Done(): - errs.AddRecoverable(clues.Wrap(ctx.Err(), "context cancelled").WithClues(ctx)) - return metrics, true + return metrics, clues.Wrap(ctx.Err(), "context cancelled").WithClues(ctx) case itemData, ok := <-items: - if !ok || errs.Failure() != nil { - return metrics, false + if !ok || el.Failure() != nil { + return metrics, el.Failure() } ictx := clues.Add(ctx, "item_id", itemData.UUID()) @@ -358,33 +169,26 @@ func restoreCollection( _, err := buf.ReadFrom(itemData.ToReader()) if err != nil { - errs.AddRecoverable(clues.Wrap(err, "reading item bytes").WithClues(ictx)) + el.AddRecoverable(clues.Wrap(err, "reading item bytes").WithClues(ictx)) continue } - byteArray := buf.Bytes() + body := buf.Bytes() - info, err := RestoreItem( - ictx, - byteArray, - category, - policy, - ac, - gs, - folderID, - user, - errs) + info, err := ir.restore(ictx, body, userID, destinationID, errs) if err != nil { - errs.AddRecoverable(err) + el.AddRecoverable(err) continue } - metrics.Bytes += int64(len(byteArray)) + metrics.Bytes += int64(len(body)) metrics.Successes++ - itemPath, err := dc.FullPath().AppendItem(itemData.UUID()) + // FIXME: this may be the incorrect path. If we restored within a top-level + // destination folder, then the restore path no longer matches the fullPath. + itemPath, err := fullPath.AppendItem(itemData.UUID()) if err != nil { - errs.AddRecoverable(clues.Wrap(err, "building full path with item").WithClues(ctx)) + el.AddRecoverable(clues.Wrap(err, "adding item to collection path").WithClues(ctx)) continue } @@ -398,7 +202,8 @@ func restoreCollection( Exchange: info, }) if err != nil { - // Not critical enough to need to stop restore operation. + // These deets additions are for cli display purposes only. + // no need to fail out on error. logger.Ctx(ctx).Infow("accounting for restored item", "error", err) } @@ -407,257 +212,131 @@ func restoreCollection( } } -// CreateContainerDestination builds the destination into the container -// at the provided path. As a precondition, the destination cannot -// already exist. If it does then an error is returned. The provided -// containerResolver is updated with the new destination. -// @ returns the container ID of the new destination container. -func CreateContainerDestination( +// createDestination creates folders in sequence +// [root leaf1 leaf2] similar to a linked list. +// @param directory is the desired path from the root to the container +// that the items will be restored into. +func createDestination( ctx context.Context, - creds account.M365Config, - directory path.Path, - destination string, - caches map[path.CategoryType]graph.ContainerResolver, + ca containerAPI, + destination *path.Builder, + userID string, + gcr graph.ContainerResolver, + isNewCache bool, errs *fault.Bus, -) (string, error) { +) (string, graph.ContainerResolver, error) { var ( - newCache = false - user = directory.ResourceOwner() - category = directory.Category() - directoryCache = caches[category] + cache = gcr + restoreLoc = &path.Builder{} + containerParentID string ) - // TODO(rkeepers): pass the api client into this func, rather than generating one. - ac, err := api.NewClient(creds) - if err != nil { - return "", clues.Stack(err).WithClues(ctx) - } + for _, container := range destination.Elements() { + restoreLoc = restoreLoc.Append(container) - switch category { - case path.EmailCategory: - folders := append([]string{destination}, directory.Folders()...) - - if directoryCache == nil { - acm := ac.Mail() - mfc := &mailFolderCache{ - userID: user, - enumer: acm, - getter: acm, - } - - caches[category] = mfc - newCache = true - directoryCache = mfc - } - - return establishMailRestoreLocation( + ictx := clues.Add( ctx, - ac, - folders, - directoryCache, - user, - newCache, + "is_new_cache", isNewCache, + "container_parent_id", containerParentID, + "container_name", container, + "restore_location", restoreLoc) + + fid, err := getOrPopulateContainer( + ictx, + ca, + cache, + restoreLoc, + userID, + containerParentID, + container, + isNewCache, errs) - - case path.ContactsCategory: - folders := append([]string{destination}, directory.Folders()...) - - if directoryCache == nil { - acc := ac.Contacts() - cfc := &contactFolderCache{ - userID: user, - enumer: acc, - getter: acc, - } - caches[category] = cfc - newCache = true - directoryCache = cfc - } - - return establishContactsRestoreLocation( - ctx, - ac, - folders, - directoryCache, - user, - newCache, - errs) - - case path.EventsCategory: - dest := destination - - if directoryCache == nil { - ace := ac.Events() - ecc := &eventCalendarCache{ - userID: user, - getter: ace, - enumer: ace, - } - caches[category] = ecc - newCache = true - directoryCache = ecc - } - - folders := append([]string{dest}, directory.Folders()...) - - return establishEventsRestoreLocation( - ctx, - ac, - folders, - directoryCache, - user, - newCache, - errs) - - default: - return "", clues.New(fmt.Sprintf("type not supported: %T", category)).WithClues(ctx) - } -} - -// establishMailRestoreLocation creates Mail folders in sequence -// [root leaf1 leaf2] in a similar to a linked list. -// @param folders is the desired path from the root to the container -// that the items will be restored into -// @param isNewCache identifies if the cache is created and not populated -func establishMailRestoreLocation( - ctx context.Context, - ac api.Client, - folders []string, - mfc graph.ContainerResolver, - user string, - isNewCache bool, - errs *fault.Bus, -) (string, error) { - // Process starts with the root folder in order to recreate - // the top-level folder with the same tactic - folderID := rootFolderAlias - pb := path.Builder{} - - ctx = clues.Add(ctx, "is_new_cache", isNewCache) - - for _, folder := range folders { - pb = *pb.Append(folder) - - cached, ok := mfc.LocationInCache(pb.String()) - if ok { - folderID = cached - continue - } - - temp, err := ac.Mail().CreateMailFolderWithParent(ctx, user, folder, folderID) if err != nil { - // Should only error if cache malfunctions or incorrect parameters - return "", err + return "", cache, clues.Stack(err) } - folderID = ptr.Val(temp.GetId()) - - // Only populate the cache if we actually had to create it. Since we set - // newCache to false in this we'll only try to populate it once per function - // call even if we make a new cache. - if isNewCache { - if err := mfc.Populate(ctx, errs, rootFolderAlias); err != nil { - return "", clues.Wrap(err, "populating folder cache") - } - - isNewCache = false - } - - // NOOP if the folder is already in the cache. - if err = mfc.AddToCache(ctx, temp); err != nil { - return "", clues.Wrap(err, "adding folder to cache") - } + containerParentID = fid } - return folderID, nil + // containerParentID now identifies the last created container, + // not its parent. + return containerParentID, cache, nil } -// establishContactsRestoreLocation creates Contact Folders in sequence -// and updates the container resolver appropriately. Contact Folders are -// displayed in a flat representation. Therefore, only the root can be populated and all content -// must be restored into the root location. -// @param folders is the list of intended folders from root to leaf (e.g. [root ...]) -// @param isNewCache bool representation of whether Populate function needs to be run -func establishContactsRestoreLocation( +func getOrPopulateContainer( ctx context.Context, - ac api.Client, - folders []string, - cfc graph.ContainerResolver, - user string, + ca containerAPI, + gcr graph.ContainerResolver, + restoreLoc *path.Builder, + userID, containerParentID, containerName string, isNewCache bool, errs *fault.Bus, ) (string, error) { - cached, ok := cfc.LocationInCache(folders[0]) + cached, ok := gcr.LocationInCache(restoreLoc.String()) if ok { return cached, nil } - ctx = clues.Add(ctx, "is_new_cache", isNewCache) + c, err := ca.CreateContainer(ctx, userID, containerName, containerParentID) - temp, err := ac.Contacts().CreateContactFolder(ctx, user, folders[0]) - if err != nil { - return "", err - } - - folderID := ptr.Val(temp.GetId()) - - if isNewCache { - if err := cfc.Populate(ctx, errs, folderID, folders[0]); err != nil { - return "", clues.Wrap(err, "populating contact cache") - } - - if err = cfc.AddToCache(ctx, temp); err != nil { - return "", clues.Wrap(err, "adding contact folder to cache") - } - } - - return folderID, nil -} - -func establishEventsRestoreLocation( - ctx context.Context, - ac api.Client, - folders []string, - ecc graph.ContainerResolver, // eventCalendarCache - user string, - isNewCache bool, - errs *fault.Bus, -) (string, error) { - // Need to prefix with the "Other Calendars" folder so lookup happens properly. - cached, ok := ecc.LocationInCache(folders[0]) - if ok { - return cached, nil - } - - ctx = clues.Add(ctx, "is_new_cache", isNewCache) - - temp, err := ac.Events().CreateCalendar(ctx, user, folders[0]) - if err != nil && !graph.IsErrFolderExists(err) { - return "", err - } - - // 409 handling: Fetch folder if it exists and add to cache. - // This is rare, but may happen if CreateCalendar() POST fails with 5xx, - // potentially leaving dirty state in graph. + // 409 handling case: + // attempt to fetch the container by name and add that result to the cache. + // This is rare, but may happen if CreateContainer() POST fails with 5xx: + // sometimes the backend will create the folder despite the 5xx response, + // leaving our local containerResolver with inconsistent state. if graph.IsErrFolderExists(err) { - temp, err = ac.Events().GetContainerByName(ctx, user, folders[0]) - if err != nil { - return "", err + cs := ca.containerSearcher() + if cs != nil { + cc, e := cs.GetContainerByName(ctx, userID, containerName) + c = cc + err = clues.Stack(err, e) } } - folderID := ptr.Val(temp.GetId()) + if err != nil { + return "", clues.Wrap(err, "creating restore container") + } + + folderID := ptr.Val(c.GetId()) if isNewCache { - if err = ecc.Populate(ctx, errs, folderID, folders[0]); err != nil { - return "", clues.Wrap(err, "populating event cache") + if err := gcr.Populate(ctx, errs, folderID, ca.orRootContainer(restoreLoc.HeadElem())); err != nil { + return "", clues.Wrap(err, "populating container cache") } + } - displayable := api.CalendarDisplayable{Calendarable: temp} - if err = ecc.AddToCache(ctx, displayable); err != nil { - return "", clues.Wrap(err, "adding new calendar to cache") - } + if err = gcr.AddToCache(ctx, c); err != nil { + return "", clues.Wrap(err, "adding container to cache") } return folderID, nil } + +func uploadAttachments( + ctx context.Context, + ap attachmentPoster, + as []models.Attachmentable, + userID, destinationID, itemID string, + errs *fault.Bus, +) error { + el := errs.Local() + + for _, a := range as { + if el.Failure() != nil { + return el.Failure() + } + + err := uploadAttachment( + ctx, + ap, + userID, + destinationID, + itemID, + a) + if err != nil { + el.AddRecoverable(clues.Wrap(err, "uploading mail attachment").WithClues(ctx)) + } + } + + return el.Failure() +} diff --git a/src/internal/connector/exchange/testdata/handlers.go b/src/internal/connector/exchange/testdata/handlers.go new file mode 100644 index 000000000..396de9008 --- /dev/null +++ b/src/internal/connector/exchange/testdata/handlers.go @@ -0,0 +1,34 @@ +package testdata + +import ( + "context" + "testing" + + "github.com/alcionai/clues" + "github.com/stretchr/testify/require" + + "github.com/alcionai/corso/src/internal/connector/exchange" + "github.com/alcionai/corso/src/internal/connector/graph" + "github.com/alcionai/corso/src/pkg/fault" + "github.com/alcionai/corso/src/pkg/path" + "github.com/alcionai/corso/src/pkg/services/m365/api" +) + +func PopulateContainerCache( + t *testing.T, + ctx context.Context, //revive:disable-line:context-as-argument + ac api.Client, + category path.CategoryType, + resourceOwnerID string, + errs *fault.Bus, +) graph.ContainerResolver { + handler, ok := exchange.BackupHandlers(ac)[category] + require.Truef(t, ok, "container resolver registered for category %s", category) + + root, cc := handler.NewContainerCache(resourceOwnerID) + + err := cc.Populate(ctx, errs, root) + require.NoError(t, err, clues.ToCore(err)) + + return cc +} diff --git a/src/internal/connector/exchange/transform.go b/src/internal/connector/exchange/transform.go index f41775093..2f0af9530 100644 --- a/src/internal/connector/exchange/transform.go +++ b/src/internal/connector/exchange/transform.go @@ -56,14 +56,7 @@ func CloneMessageableFields(orig, message models.Messageable) models.Messageable func toMessage(orig models.Messageable) models.Messageable { message := models.NewMessage() - temp := CloneMessageableFields(orig, message) - - aMessage, ok := temp.(*models.Message) - if !ok { - return nil - } - - return aMessage + return CloneMessageableFields(orig, message) } // ToEventSimplified transforms an event to simplified restore format diff --git a/src/internal/connector/graph/cache_container.go b/src/internal/connector/graph/cache_container.go index 2b12354f4..d83b17d74 100644 --- a/src/internal/connector/graph/cache_container.go +++ b/src/internal/connector/graph/cache_container.go @@ -64,7 +64,7 @@ type ContainerResolver interface { // @param ctx is necessary param for Graph API tracing // @param baseFolderID represents the M365ID base that the resolver will // conclude its search. Default input is "". - Populate(ctx context.Context, errs *fault.Bus, baseFolderID string, baseContainerPather ...string) error + Populate(ctx context.Context, errs *fault.Bus, baseFolderID string, baseContainerPath ...string) error // PathInCache performs a look up of a path representation // and returns the m365ID of directory iff the pathString diff --git a/src/internal/connector/graph/concurrency_middleware.go b/src/internal/connector/graph/concurrency_middleware.go index ba2a08fa6..c70988f65 100644 --- a/src/internal/connector/graph/concurrency_middleware.go +++ b/src/internal/connector/graph/concurrency_middleware.go @@ -142,6 +142,8 @@ type limiterConsumptionKey string const limiterConsumptionCtxKey limiterConsumptionKey = "corsoGraphRateLimiterConsumption" const ( + // https://learn.microsoft.com/en-us/sharepoint/dev/general-development + // /how-to-avoid-getting-throttled-or-blocked-in-sharepoint-online#application-throttling defaultLC = 1 driveDefaultLC = 2 // limit consumption rate for single-item GETs requests, diff --git a/src/internal/connector/graph/consts.go b/src/internal/connector/graph/consts.go index 0438ab10c..00785f1fa 100644 --- a/src/internal/connector/graph/consts.go +++ b/src/internal/connector/graph/consts.go @@ -7,7 +7,15 @@ import ( "github.com/alcionai/corso/src/pkg/path" ) -const AttachmentChunkSize = 4 * 1024 * 1024 +const ( + AttachmentChunkSize = 4 * 1024 * 1024 + + // Upper limit on the number of concurrent uploads as we + // create buffer pools for each upload. This is not the actual + // number of uploads, but the max that can be specified. This is + // added as a safeguard in case we misconfigure the values. + maxConccurrentUploads = 20 +) // --------------------------------------------------------------------------- // item response AdditionalData @@ -44,6 +52,8 @@ type parallelism struct { collectionBuffer int // sets the parallelism of item population within a collection. item int + // sets the parallelism of concurrent uploads within a collection + itemUpload int } func (p parallelism) CollectionBufferSize() int { @@ -88,6 +98,18 @@ func (p parallelism) Item() int { return p.item } +func (p parallelism) ItemUpload() int { + if p.itemUpload == 0 { + return 1 + } + + if p.itemUpload > maxConccurrentUploads { + return maxConccurrentUploads + } + + return p.itemUpload +} + // returns low <= v <= high // if high < low, returns low <= v func isWithin(low, high, v int) bool { @@ -102,6 +124,7 @@ var sp = map[path.ServiceType]parallelism{ path.OneDriveService: { collectionBuffer: 5, item: 4, + itemUpload: 7, }, // sharepoint libraries are considered "onedrive" parallelism. // this only controls lists/pages. diff --git a/src/internal/connector/graph/errors.go b/src/internal/connector/graph/errors.go index 81886965e..bca55a9ec 100644 --- a/src/internal/connector/graph/errors.go +++ b/src/internal/connector/graph/errors.go @@ -139,6 +139,7 @@ func IsErrTimeout(err error) bool { } return errors.Is(err, ErrTimeout) || + errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || errors.Is(err, http.ErrHandlerTimeout) || os.IsTimeout(err) diff --git a/src/internal/connector/graph/service.go b/src/internal/connector/graph/service.go index 4335972a4..1c85e760c 100644 --- a/src/internal/connector/graph/service.go +++ b/src/internal/connector/graph/service.go @@ -13,7 +13,6 @@ import ( msgraphgocore "github.com/microsoftgraph/msgraph-sdk-go-core" "github.com/alcionai/corso/src/internal/common/idname" - "github.com/alcionai/corso/src/pkg/account" "github.com/alcionai/corso/src/pkg/path" ) @@ -41,7 +40,7 @@ func AllMetadataFileNames() []string { type QueryParams struct { Category path.CategoryType ResourceOwner idname.Provider - Credentials account.M365Config + TenantID string } // --------------------------------------------------------------------------- diff --git a/src/internal/connector/onedrive/collection.go b/src/internal/connector/onedrive/collection.go index 6f7cf3da8..f4f3807fb 100644 --- a/src/internal/connector/onedrive/collection.go +++ b/src/internal/connector/onedrive/collection.go @@ -439,12 +439,11 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { queuedPath = "/" + oc.driveName + queuedPath } - folderProgress, colCloser := observe.ProgressWithCount( + folderProgress := observe.ProgressWithCount( ctx, observe.ItemQueueMsg, path.NewElements(queuedPath), int64(len(oc.driveItems))) - defer colCloser() defer close(folderProgress) semaphoreCh := make(chan struct{}, graph.Parallelism(path.OneDriveService).Item()) @@ -535,13 +534,12 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { } // display/log the item download - progReader, closer := observe.ItemProgress( + progReader, _ := observe.ItemProgress( ctx, itemData, observe.ItemBackupMsg, clues.Hide(itemName+dataSuffix), itemSize) - go closer() return progReader, nil }) @@ -554,13 +552,12 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) { } metaReader := lazy.NewLazyReadCloser(func() (io.ReadCloser, error) { - progReader, closer := observe.ItemProgress( + progReader, _ := observe.ItemProgress( ctx, itemMeta, observe.ItemBackupMsg, clues.Hide(itemName+metaSuffix), int64(itemMetaSize)) - go closer() return progReader, nil }) diff --git a/src/internal/connector/onedrive/collections.go b/src/internal/connector/onedrive/collections.go index aabee1bcf..71e665053 100644 --- a/src/internal/connector/onedrive/collections.go +++ b/src/internal/connector/onedrive/collections.go @@ -283,8 +283,7 @@ func (c *Collections) Get( driveTombstones[driveID] = struct{}{} } - driveComplete, closer := observe.MessageWithCompletion(ctx, observe.Bulletf("files")) - defer closer() + driveComplete := observe.MessageWithCompletion(ctx, observe.Bulletf("files")) defer close(driveComplete) // Enumerate drives for the specified resourceOwner diff --git a/src/internal/connector/onedrive/item.go b/src/internal/connector/onedrive/item.go index ac05887cc..97578589f 100644 --- a/src/internal/connector/onedrive/item.go +++ b/src/internal/connector/onedrive/item.go @@ -346,27 +346,6 @@ func sharePointItemInfo(di models.DriveItemable, itemSize int64) *details.ShareP } } -// driveItemWriter is used to initialize and return an io.Writer to upload data for the specified item -// It does so by creating an upload session and using that URL to initialize an `itemWriter` -// TODO: @vkamra verify if var session is the desired input -func driveItemWriter( - ctx context.Context, - gs graph.Servicer, - driveID, itemID string, - itemSize int64, -) (io.Writer, error) { - ctx = clues.Add(ctx, "upload_item_id", itemID) - - r, err := api.PostDriveItem(ctx, gs, driveID, itemID) - if err != nil { - return nil, clues.Stack(err) - } - - iw := graph.NewLargeItemWriter(itemID, ptr.Val(r.GetUploadUrl()), itemSize) - - return iw, nil -} - // constructWebURL helper function for recreating the webURL // for the originating SharePoint site. Uses additional data map // from a models.DriveItemable that possesses a downloadURL within the map. diff --git a/src/internal/connector/onedrive/item_test.go b/src/internal/connector/onedrive/item_test.go index a30037b43..5ad4f537a 100644 --- a/src/internal/connector/onedrive/item_test.go +++ b/src/internal/connector/onedrive/item_test.go @@ -189,9 +189,13 @@ func (suite *ItemIntegrationSuite) TestItemWriter() { // Initialize a 100KB mockDataProvider td, writeSize := mockDataReader(int64(100 * 1024)) - w, err := driveItemWriter(ctx, srv, test.driveID, ptr.Val(newItem.GetId()), writeSize) + itemID := ptr.Val(newItem.GetId()) + + r, err := api.PostDriveItem(ctx, srv, test.driveID, itemID) require.NoError(t, err, clues.ToCore(err)) + w := graph.NewLargeItemWriter(itemID, ptr.Val(r.GetUploadUrl()), writeSize) + // Using a 32 KB buffer for the copy allows us to validate the // multi-part upload. `io.CopyBuffer` will only write 32 KB at // a time diff --git a/src/internal/connector/onedrive/restore.go b/src/internal/connector/onedrive/restore.go index 755496420..124668bb1 100644 --- a/src/internal/connector/onedrive/restore.go +++ b/src/internal/connector/onedrive/restore.go @@ -3,10 +3,13 @@ package onedrive import ( "context" "encoding/json" + "fmt" "io" "runtime/trace" "sort" "strings" + "sync" + "sync/atomic" "github.com/alcionai/clues" "github.com/pkg/errors" @@ -28,10 +31,10 @@ import ( "github.com/alcionai/corso/src/pkg/services/m365/api" ) -// copyBufferSize is used for chunked upload -// Microsoft recommends 5-10MB buffers -// https://docs.microsoft.com/en-us/graph/api/driveitem-createuploadsession?view=graph-rest-1.0#best-practices -const copyBufferSize = 5 * 1024 * 1024 +const ( + // Maximum number of retries for upload failures + maxUploadRetries = 3 +) type restoreCaches struct { Folders *folderCache @@ -59,6 +62,7 @@ func RestoreCollections( opts control.Options, dcs []data.RestoreCollection, deets *details.Builder, + pool *sync.Pool, errs *fault.Bus, ) (*support.ConnectorOperationStatus, error) { var ( @@ -104,6 +108,7 @@ func RestoreCollections( dest.ContainerName, deets, opts.RestorePermissions, + pool, errs) if err != nil { el.AddRecoverable(err) @@ -142,13 +147,18 @@ func RestoreCollection( restoreContainerName string, deets *details.Builder, restorePerms bool, + pool *sync.Pool, errs *fault.Bus, ) (support.CollectionMetrics, error) { var ( - metrics = support.CollectionMetrics{} - copyBuffer = make([]byte, copyBufferSize) - directory = dc.FullPath() - el = errs.Local() + metrics = support.CollectionMetrics{} + directory = dc.FullPath() + el = errs.Local() + metricsObjects int64 + metricsBytes int64 + metricsSuccess int64 + wg sync.WaitGroup + complete bool ) ctx, end := diagnostics.Span(ctx, "gc:drive:restoreCollection", diagnostics.Label("path", directory)) @@ -212,8 +222,30 @@ func RestoreCollection( caches.ParentDirToMeta[dc.FullPath().String()] = colMeta items := dc.Items(ctx, errs) + semaphoreCh := make(chan struct{}, graph.Parallelism(path.OneDriveService).ItemUpload()) + defer close(semaphoreCh) + + deetsLock := sync.Mutex{} + + updateDeets := func( + ctx context.Context, + repoRef path.Path, + locationRef *path.Builder, + updated bool, + info details.ItemInfo, + ) { + deetsLock.Lock() + defer deetsLock.Unlock() + + err = deets.Add(repoRef, locationRef, updated, info) + if err != nil { + // Not critical enough to need to stop restore operation. + logger.CtxErr(ctx, err).Infow("adding restored item to details") + } + } + for { - if el.Failure() != nil { + if el.Failure() != nil || complete { break } @@ -223,62 +255,76 @@ func RestoreCollection( case itemData, ok := <-items: if !ok { - return metrics, nil + // We've processed all items in this collection, exit the loop + complete = true + break } - ictx := clues.Add(ctx, "restore_item_id", itemData.UUID()) + wg.Add(1) + semaphoreCh <- struct{}{} - itemPath, err := dc.FullPath().AppendItem(itemData.UUID()) - if err != nil { - el.AddRecoverable(clues.Wrap(err, "appending item to full path").WithClues(ictx)) - continue - } + go func(ctx context.Context, itemData data.Stream) { + defer wg.Done() + defer func() { <-semaphoreCh }() - itemInfo, skipped, err := restoreItem( - ictx, - creds, - dc, - backupVersion, - source, - service, - drivePath, - restoreFolderID, - copyBuffer, - caches, - restorePerms, - itemData, - itemPath) + copyBufferPtr := pool.Get().(*[]byte) + defer pool.Put(copyBufferPtr) - // skipped items don't get counted, but they can error - if !skipped { - metrics.Objects++ - metrics.Bytes += int64(len(copyBuffer)) - } + copyBuffer := *copyBufferPtr - if err != nil { - el.AddRecoverable(clues.Wrap(err, "restoring item")) - continue - } + ictx := clues.Add(ctx, "restore_item_id", itemData.UUID()) - if skipped { - logger.Ctx(ictx).With("item_path", itemPath).Debug("did not restore item") - continue - } + itemPath, err := dc.FullPath().AppendItem(itemData.UUID()) + if err != nil { + el.AddRecoverable(clues.Wrap(err, "appending item to full path").WithClues(ictx)) + return + } - err = deets.Add( - itemPath, - &path.Builder{}, // TODO: implement locationRef - true, - itemInfo) - if err != nil { - // Not critical enough to need to stop restore operation. - logger.CtxErr(ictx, err).Infow("adding restored item to details") - } + itemInfo, skipped, err := restoreItem( + ictx, + creds, + dc, + backupVersion, + source, + service, + drivePath, + restoreFolderID, + copyBuffer, + caches, + restorePerms, + itemData, + itemPath) - metrics.Successes++ + // skipped items don't get counted, but they can error + if !skipped { + atomic.AddInt64(&metricsObjects, 1) + atomic.AddInt64(&metricsBytes, int64(len(copyBuffer))) + } + + if err != nil { + el.AddRecoverable(clues.Wrap(err, "restoring item")) + return + } + + if skipped { + logger.Ctx(ictx).With("item_path", itemPath).Debug("did not restore item") + return + } + + // TODO: implement locationRef + updateDeets(ictx, itemPath, &path.Builder{}, true, itemInfo) + + atomic.AddInt64(&metricsSuccess, 1) + }(ctx, itemData) } } + wg.Wait() + + metrics.Objects = int(metricsObjects) + metrics.Bytes = metricsBytes + metrics.Successes = int(metricsSuccess) + return metrics, el.Failure() } @@ -308,6 +354,7 @@ func restoreItem( source, service, drivePath, + dc, restoreFolderID, copyBuffer, itemData) @@ -399,6 +446,7 @@ func restoreV0File( source driveSource, service graph.Servicer, drivePath *path.DrivePath, + fetcher fileFetcher, restoreFolderID string, copyBuffer []byte, itemData data.Stream, @@ -406,6 +454,7 @@ func restoreV0File( _, itemInfo, err := restoreData( ctx, service, + fetcher, itemData.UUID(), itemData, drivePath.DriveID, @@ -442,6 +491,7 @@ func restoreV1File( itemID, itemInfo, err := restoreData( ctx, service, + fetcher, trimmedName, itemData, drivePath.DriveID, @@ -525,6 +575,7 @@ func restoreV6File( itemID, itemInfo, err := restoreData( ctx, service, + fetcher, meta.FileName, itemData, drivePath.DriveID, @@ -673,6 +724,7 @@ func createRestoreFolders( func restoreData( ctx context.Context, service graph.Servicer, + fetcher fileFetcher, name string, itemData data.Stream, driveID, parentFolderID string, @@ -696,26 +748,65 @@ func restoreData( return "", details.ItemInfo{}, err } - // Get a drive item writer - w, err := driveItemWriter(ctx, service, driveID, ptr.Val(newItem.GetId()), ss.Size()) + itemID := ptr.Val(newItem.GetId()) + ctx = clues.Add(ctx, "upload_item_id", itemID) + + r, err := api.PostDriveItem(ctx, service, driveID, itemID) if err != nil { - return "", details.ItemInfo{}, err + return "", details.ItemInfo{}, clues.Wrap(err, "get upload session") } - iReader := itemData.ToReader() - progReader, closer := observe.ItemProgress( - ctx, - iReader, - observe.ItemRestoreMsg, - clues.Hide(name), - ss.Size()) + var written int64 - go closer() + // This is just to retry file upload, the uploadSession creation is + // not retried here We need extra logic to retry file upload as we + // have to pull the file again from kopia If we fail a file upload, + // we restart from scratch and try to upload again. Graph does not + // show "register" any partial file uploads and so if we fail an + // upload the file size will be 0. + for i := 0; i <= maxUploadRetries; i++ { + // Initialize and return an io.Writer to upload data for the + // specified item It does so by creating an upload session and + // using that URL to initialize an `itemWriter` + // TODO: @vkamra verify if var session is the desired input + w := graph.NewLargeItemWriter(itemID, ptr.Val(r.GetUploadUrl()), ss.Size()) + + pname := name + iReader := itemData.ToReader() + + if i > 0 { + pname = fmt.Sprintf("%s (retry %d)", name, i) + + // If it is not the first try, we have to pull the file + // again from kopia. Ideally we could just seek the stream + // but we don't have a Seeker available here. + itemData, err := fetcher.Fetch(ctx, itemData.UUID()) + if err != nil { + return "", details.ItemInfo{}, clues.Wrap(err, "get data file") + } + + iReader = itemData.ToReader() + } + + progReader, abort := observe.ItemProgress( + ctx, + iReader, + observe.ItemRestoreMsg, + clues.Hide(pname), + ss.Size()) + + // Upload the stream data + written, err = io.CopyBuffer(w, progReader, copyBuffer) + if err == nil { + break + } + + // clear out the bar if err + abort() + } - // Upload the stream data - written, err := io.CopyBuffer(w, progReader, copyBuffer) if err != nil { - return "", details.ItemInfo{}, graph.Wrap(ctx, err, "writing item bytes") + return "", details.ItemInfo{}, clues.Wrap(err, "uploading file") } dii := details.ItemInfo{} diff --git a/src/internal/connector/sharepoint/collection.go b/src/internal/connector/sharepoint/collection.go index 87bd268d7..da63e9b89 100644 --- a/src/internal/connector/sharepoint/collection.go +++ b/src/internal/connector/sharepoint/collection.go @@ -183,11 +183,10 @@ func (sc *Collection) runPopulate(ctx context.Context, errs *fault.Bus) (support ) // TODO: Insert correct ID for CollectionProgress - colProgress, closer := observe.CollectionProgress( + colProgress := observe.CollectionProgress( ctx, sc.fullPath.Category().String(), sc.fullPath.Folders()) - go closer() defer func() { close(colProgress) diff --git a/src/internal/connector/sharepoint/data_collections.go b/src/internal/connector/sharepoint/data_collections.go index e23de9493..f7f0f2481 100644 --- a/src/internal/connector/sharepoint/data_collections.go +++ b/src/internal/connector/sharepoint/data_collections.go @@ -61,10 +61,9 @@ func DataCollections( break } - foldersComplete, closer := observe.MessageWithCompletion( + foldersComplete := observe.MessageWithCompletion( ctx, observe.Bulletf("%s", scope.Category().PathType())) - defer closer() defer close(foldersComplete) var spcs []data.BackupCollection diff --git a/src/internal/connector/sharepoint/restore.go b/src/internal/connector/sharepoint/restore.go index f889a25b9..2f15152a8 100644 --- a/src/internal/connector/sharepoint/restore.go +++ b/src/internal/connector/sharepoint/restore.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "runtime/trace" + "sync" "github.com/alcionai/clues" "github.com/microsoftgraph/msgraph-sdk-go/models" @@ -48,6 +49,7 @@ func RestoreCollections( opts control.Options, dcs []data.RestoreCollection, deets *details.Builder, + pool *sync.Pool, errs *fault.Bus, ) (*support.ConnectorOperationStatus, error) { var ( @@ -90,6 +92,7 @@ func RestoreCollections( dest.ContainerName, deets, opts.RestorePermissions, + pool, errs) case path.ListsCategory: diff --git a/src/internal/kopia/base_finder.go b/src/internal/kopia/base_finder.go index 837691d89..b01c4401a 100644 --- a/src/internal/kopia/base_finder.go +++ b/src/internal/kopia/base_finder.go @@ -13,8 +13,40 @@ import ( "github.com/alcionai/corso/src/internal/operations/inject" "github.com/alcionai/corso/src/pkg/backup" "github.com/alcionai/corso/src/pkg/logger" + "github.com/alcionai/corso/src/pkg/path" ) +const ( + // Kopia does not do comparisons properly for empty tags right now so add some + // placeholder value to them. + defaultTagValue = "0" + + // Kopia CLI prefixes all user tags with "tag:"[1]. Maintaining this will + // ensure we don't accidentally take reserved tags and that tags can be + // displayed with kopia CLI. + // (permalinks) + // [1] https://github.com/kopia/kopia/blob/05e729a7858a6e86cb48ba29fb53cb6045efce2b/cli/command_snapshot_create.go#L169 + userTagPrefix = "tag:" +) + +type Reason struct { + ResourceOwner string + Service path.ServiceType + Category path.CategoryType +} + +func (r Reason) TagKeys() []string { + return []string{ + r.ResourceOwner, + serviceCatString(r.Service, r.Category), + } +} + +// Key is the concatenation of the ResourceOwner, Service, and Category. +func (r Reason) Key() string { + return r.ResourceOwner + r.Service.String() + r.Category.String() +} + type backupBases struct { backups []BackupEntry mergeBases []ManifestEntry @@ -26,6 +58,63 @@ type BackupEntry struct { Reasons []Reason } +type ManifestEntry struct { + *snapshot.Manifest + // Reason contains the ResourceOwners and Service/Categories that caused this + // snapshot to be selected as a base. We can't reuse OwnersCats here because + // it's possible some ResourceOwners will have a subset of the Categories as + // the reason for selecting a snapshot. For example: + // 1. backup user1 email,contacts -> B1 + // 2. backup user1 contacts -> B2 (uses B1 as base) + // 3. backup user1 email,contacts,events (uses B1 for email, B2 for contacts) + Reasons []Reason +} + +func (me ManifestEntry) GetTag(key string) (string, bool) { + k, _ := makeTagKV(key) + v, ok := me.Tags[k] + + return v, ok +} + +type snapshotManager interface { + FindManifests( + ctx context.Context, + tags map[string]string, + ) ([]*manifest.EntryMetadata, error) + LoadSnapshot(ctx context.Context, id manifest.ID) (*snapshot.Manifest, error) +} + +func serviceCatString(s path.ServiceType, c path.CategoryType) string { + return s.String() + c.String() +} + +// MakeTagKV normalizes the provided key to protect it from clobbering +// similarly named tags from non-user input (user inputs are still open +// to collisions amongst eachother). +// Returns the normalized Key plus a default value. If you're embedding a +// key-only tag, the returned default value msut be used instead of an +// empty string. +func makeTagKV(k string) (string, string) { + return userTagPrefix + k, defaultTagValue +} + +func normalizeTagKVs(tags map[string]string) map[string]string { + t2 := make(map[string]string, len(tags)) + + for k, v := range tags { + mk, mv := makeTagKV(k) + + if len(v) == 0 { + v = mv + } + + t2[mk] = v + } + + return t2 +} + type baseFinder struct { sm snapshotManager bg inject.GetBackuper diff --git a/src/internal/kopia/base_finder_test.go b/src/internal/kopia/base_finder_test.go index 0c3761618..2382063cd 100644 --- a/src/internal/kopia/base_finder_test.go +++ b/src/internal/kopia/base_finder_test.go @@ -27,11 +27,9 @@ const ( var ( testT1 = time.Now() testT2 = testT1.Add(1 * time.Hour) - testT3 = testT2.Add(1 * time.Hour) testID1 = manifest.ID("snap1") testID2 = manifest.ID("snap2") - testID3 = manifest.ID("snap3") testBackup1 = "backupID1" testBackup2 = "backupID2" diff --git a/src/internal/kopia/conn.go b/src/internal/kopia/conn.go index 3447e810f..30870a1b3 100644 --- a/src/internal/kopia/conn.go +++ b/src/internal/kopia/conn.go @@ -418,18 +418,6 @@ func (w *conn) LoadSnapshot( return man, nil } -func (w *conn) LoadSnapshots( - ctx context.Context, - ids []manifest.ID, -) ([]*snapshot.Manifest, error) { - mans, err := snapshot.LoadSnapshots(ctx, w.Repository, ids) - if err != nil { - return nil, clues.Stack(err).WithClues(ctx) - } - - return mans, nil -} - func (w *conn) SnapshotRoot(man *snapshot.Manifest) (fs.Entry, error) { return snapshotfs.SnapshotRoot(w.Repository, man) } diff --git a/src/internal/kopia/snapshot_manager.go b/src/internal/kopia/snapshot_manager.go deleted file mode 100644 index 5fa09223c..000000000 --- a/src/internal/kopia/snapshot_manager.go +++ /dev/null @@ -1,307 +0,0 @@ -package kopia - -import ( - "context" - "sort" - - "github.com/alcionai/clues" - "github.com/kopia/kopia/repo/manifest" - "github.com/kopia/kopia/snapshot" - "golang.org/x/exp/maps" - - "github.com/alcionai/corso/src/pkg/logger" - "github.com/alcionai/corso/src/pkg/path" -) - -const ( - // Kopia does not do comparisons properly for empty tags right now so add some - // placeholder value to them. - defaultTagValue = "0" - - // Kopia CLI prefixes all user tags with "tag:"[1]. Maintaining this will - // ensure we don't accidentally take reserved tags and that tags can be - // displayed with kopia CLI. - // (permalinks) - // [1] https://github.com/kopia/kopia/blob/05e729a7858a6e86cb48ba29fb53cb6045efce2b/cli/command_snapshot_create.go#L169 - userTagPrefix = "tag:" -) - -type Reason struct { - ResourceOwner string - Service path.ServiceType - Category path.CategoryType -} - -func (r Reason) TagKeys() []string { - return []string{ - r.ResourceOwner, - serviceCatString(r.Service, r.Category), - } -} - -// Key is the concatenation of the ResourceOwner, Service, and Category. -func (r Reason) Key() string { - return r.ResourceOwner + r.Service.String() + r.Category.String() -} - -type ManifestEntry struct { - *snapshot.Manifest - // Reason contains the ResourceOwners and Service/Categories that caused this - // snapshot to be selected as a base. We can't reuse OwnersCats here because - // it's possible some ResourceOwners will have a subset of the Categories as - // the reason for selecting a snapshot. For example: - // 1. backup user1 email,contacts -> B1 - // 2. backup user1 contacts -> B2 (uses B1 as base) - // 3. backup user1 email,contacts,events (uses B1 for email, B2 for contacts) - Reasons []Reason -} - -func (me ManifestEntry) GetTag(key string) (string, bool) { - k, _ := makeTagKV(key) - v, ok := me.Tags[k] - - return v, ok -} - -type snapshotManager interface { - FindManifests( - ctx context.Context, - tags map[string]string, - ) ([]*manifest.EntryMetadata, error) - LoadSnapshot(ctx context.Context, id manifest.ID) (*snapshot.Manifest, error) - // TODO(ashmrtn): Remove this when we switch to the new BaseFinder. - LoadSnapshots(ctx context.Context, ids []manifest.ID) ([]*snapshot.Manifest, error) -} - -func serviceCatString(s path.ServiceType, c path.CategoryType) string { - return s.String() + c.String() -} - -// MakeTagKV normalizes the provided key to protect it from clobbering -// similarly named tags from non-user input (user inputs are still open -// to collisions amongst eachother). -// Returns the normalized Key plus a default value. If you're embedding a -// key-only tag, the returned default value msut be used instead of an -// empty string. -func makeTagKV(k string) (string, string) { - return userTagPrefix + k, defaultTagValue -} - -// getLastIdx searches for manifests contained in both foundMans and metas -// and returns the most recent complete manifest index and the manifest it -// corresponds to. If no complete manifest is in both lists returns nil, -1. -func getLastIdx( - foundMans map[manifest.ID]*ManifestEntry, - metas []*manifest.EntryMetadata, -) (*ManifestEntry, int) { - // Minor optimization: the current code seems to return the entries from - // earliest timestamp to latest (this is undocumented). Sort in the same - // fashion so that we don't incur a bunch of swaps. - sort.Slice(metas, func(i, j int) bool { - return metas[i].ModTime.Before(metas[j].ModTime) - }) - - // Search newest to oldest. - for i := len(metas) - 1; i >= 0; i-- { - m := foundMans[metas[i].ID] - if m == nil || len(m.IncompleteReason) > 0 { - continue - } - - return m, i - } - - return nil, -1 -} - -// manifestsSinceLastComplete searches through mans and returns the most recent -// complete manifest (if one exists), maybe the most recent incomplete -// manifest, and a bool denoting if a complete manifest was found. If the newest -// incomplete manifest is more recent than the newest complete manifest then -// adds it to the returned list. Otherwise no incomplete manifest is returned. -// Returns nil if there are no complete or incomplete manifests in mans. -func manifestsSinceLastComplete( - ctx context.Context, - mans []*snapshot.Manifest, -) ([]*snapshot.Manifest, bool) { - var ( - res []*snapshot.Manifest - foundIncomplete bool - foundComplete bool - ) - - // Manifests should maintain the sort order of the original IDs that were used - // to fetch the data, but just in case sort oldest to newest. - mans = snapshot.SortByTime(mans, false) - - for i := len(mans) - 1; i >= 0; i-- { - m := mans[i] - - if len(m.IncompleteReason) > 0 { - if !foundIncomplete { - res = append(res, m) - foundIncomplete = true - - logger.Ctx(ctx).Infow("found incomplete snapshot", "snapshot_id", m.ID) - } - - continue - } - - // Once we find a complete snapshot we're done, even if we haven't - // found an incomplete one yet. - res = append(res, m) - foundComplete = true - - logger.Ctx(ctx).Infow("found complete snapshot", "snapshot_id", m.ID) - - break - } - - return res, foundComplete -} - -// fetchPrevManifests returns the most recent, as-of-yet unfound complete and -// (maybe) incomplete manifests in metas. If the most recent incomplete manifest -// is older than the most recent complete manifest no incomplete manifest is -// returned. If only incomplete manifests exists, returns the most recent one. -// Returns no manifests if an error occurs. -func fetchPrevManifests( - ctx context.Context, - sm snapshotManager, - foundMans map[manifest.ID]*ManifestEntry, - reason Reason, - tags map[string]string, -) ([]*snapshot.Manifest, error) { - allTags := map[string]string{} - - for _, k := range reason.TagKeys() { - allTags[k] = "" - } - - maps.Copy(allTags, tags) - allTags = normalizeTagKVs(allTags) - - metas, err := sm.FindManifests(ctx, allTags) - if err != nil { - return nil, clues.Wrap(err, "fetching manifest metas by tag") - } - - if len(metas) == 0 { - return nil, nil - } - - man, lastCompleteIdx := getLastIdx(foundMans, metas) - - // We have a complete cached snapshot and it's the most recent. No need - // to do anything else. - if lastCompleteIdx == len(metas)-1 { - return []*snapshot.Manifest{man.Manifest}, nil - } - - // TODO(ashmrtn): Remainder of the function can be simplified if we can inject - // different tags to the snapshot checkpoints than the complete snapshot. - - // Fetch all manifests newer than the oldest complete snapshot. A little - // wasteful as we may also re-fetch the most recent incomplete manifest, but - // it reduces the complexity of returning the most recent incomplete manifest - // if it is newer than the most recent complete manifest. - ids := make([]manifest.ID, 0, len(metas)-(lastCompleteIdx+1)) - for i := lastCompleteIdx + 1; i < len(metas); i++ { - ids = append(ids, metas[i].ID) - } - - mans, err := sm.LoadSnapshots(ctx, ids) - if err != nil { - return nil, clues.Wrap(err, "fetching previous manifests") - } - - found, hasCompleted := manifestsSinceLastComplete(ctx, mans) - - // If we didn't find another complete manifest then we need to mark the - // previous complete manifest as having this ResourceOwner, Service, Category - // as the reason as well. - if !hasCompleted && man != nil { - found = append(found, man.Manifest) - logger.Ctx(ctx).Infow( - "reusing cached complete snapshot", - "snapshot_id", man.ID) - } - - return found, nil -} - -// fetchPrevSnapshotManifests returns a set of manifests for complete and maybe -// incomplete snapshots for the given (resource owner, service, category) -// tuples. Up to two manifests can be returned per tuple: one complete and one -// incomplete. An incomplete manifest may be returned if it is newer than the -// newest complete manifest for the tuple. Manifests are deduped such that if -// multiple tuples match the same manifest it will only be returned once. -// External callers can access this via wrapper.FetchPrevSnapshotManifests(). -// If tags are provided, manifests must include a superset of the k:v pairs -// specified by those tags. Tags should pass their raw values, and will be -// normalized inside the func using MakeTagKV. -func fetchPrevSnapshotManifests( - ctx context.Context, - sm snapshotManager, - reasons []Reason, - tags map[string]string, -) []*ManifestEntry { - mans := map[manifest.ID]*ManifestEntry{} - - // For each serviceCat/resource owner pair that we will be backing up, see if - // there's a previous incomplete snapshot and/or a previous complete snapshot - // we can pass in. Can be expanded to return more than the most recent - // snapshots, but may require more memory at runtime. - for _, reason := range reasons { - ictx := clues.Add(ctx, "service", reason.Service.String(), "category", reason.Category.String()) - logger.Ctx(ictx).Info("searching for previous manifests for reason") - - found, err := fetchPrevManifests(ictx, sm, mans, reason, tags) - if err != nil { - logger.CtxErr(ictx, err).Info("fetching previous snapshot manifests for service/category/resource owner") - - // Snapshot can still complete fine, just not as efficient. - continue - } - - // If we found more recent snapshots then add them. - for _, m := range found { - man := mans[m.ID] - if man == nil { - mans[m.ID] = &ManifestEntry{ - Manifest: m, - Reasons: []Reason{reason}, - } - - continue - } - - // This manifest has multiple reasons for being chosen. Merge them here. - man.Reasons = append(man.Reasons, reason) - } - } - - res := make([]*ManifestEntry, 0, len(mans)) - for _, m := range mans { - res = append(res, m) - } - - return res -} - -func normalizeTagKVs(tags map[string]string) map[string]string { - t2 := make(map[string]string, len(tags)) - - for k, v := range tags { - mk, mv := makeTagKV(k) - - if len(v) == 0 { - v = mv - } - - t2[mk] = v - } - - return t2 -} diff --git a/src/internal/kopia/snapshot_manager_test.go b/src/internal/kopia/snapshot_manager_test.go deleted file mode 100644 index 9acc1735f..000000000 --- a/src/internal/kopia/snapshot_manager_test.go +++ /dev/null @@ -1,932 +0,0 @@ -package kopia - -import ( - "context" - "testing" - "time" - - "github.com/alcionai/clues" - "github.com/kopia/kopia/fs" - "github.com/kopia/kopia/repo/manifest" - "github.com/kopia/kopia/snapshot" - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/suite" - - "github.com/alcionai/corso/src/internal/tester" - "github.com/alcionai/corso/src/pkg/path" -) - -func newManifestInfo( - id manifest.ID, - modTime time.Time, - incomplete bool, - tags ...string, -) manifestInfo { - incompleteStr := "" - if incomplete { - incompleteStr = "checkpoint" - } - - structTags := make(map[string]string, len(tags)) - - for _, t := range tags { - tk, _ := makeTagKV(t) - structTags[tk] = "" - } - - return manifestInfo{ - tags: structTags, - metadata: &manifest.EntryMetadata{ - ID: id, - ModTime: modTime, - }, - man: &snapshot.Manifest{ - ID: id, - StartTime: fs.UTCTimestamp(modTime.UnixNano()), - IncompleteReason: incompleteStr, - }, - } -} - -type mockSnapshotManager struct { - data []manifestInfo - loadCallback func(ids []manifest.ID) -} - -func matchesTags(mi manifestInfo, tags map[string]string) bool { - for k := range tags { - if _, ok := mi.tags[k]; !ok { - return false - } - } - - return true -} - -func (msm *mockSnapshotManager) FindManifests( - ctx context.Context, - tags map[string]string, -) ([]*manifest.EntryMetadata, error) { - if msm == nil { - return nil, assert.AnError - } - - res := []*manifest.EntryMetadata{} - - for _, mi := range msm.data { - if matchesTags(mi, tags) { - res = append(res, mi.metadata) - } - } - - return res, nil -} - -func (msm *mockSnapshotManager) LoadSnapshots( - ctx context.Context, - ids []manifest.ID, -) ([]*snapshot.Manifest, error) { - if msm == nil { - return nil, assert.AnError - } - - // Allow checking set of IDs passed in. - if msm.loadCallback != nil { - msm.loadCallback(ids) - } - - res := []*snapshot.Manifest{} - - for _, id := range ids { - for _, mi := range msm.data { - if mi.man.ID == id { - res = append(res, mi.man) - } - } - } - - return res, nil -} - -func (msm *mockSnapshotManager) LoadSnapshot( - ctx context.Context, - id manifest.ID, -) (*snapshot.Manifest, error) { - return nil, clues.New("not implemented") -} - -type SnapshotFetchUnitSuite struct { - tester.Suite -} - -func TestSnapshotFetchUnitSuite(t *testing.T) { - suite.Run(t, &SnapshotFetchUnitSuite{Suite: tester.NewUnitSuite(t)}) -} - -func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots() { - table := []struct { - name string - input []Reason - data []manifestInfo - // Use this to denote which manifests in data should be expected. Allows - // defining data in a table while not repeating things between data and - // expected. - expectedIdxs []int - // Use this to denote the Reasons a manifest is selected. The int maps to - // the index of the manifest in data. - expectedReasons map[int][]Reason - // Expected number of times a manifest should try to be loaded from kopia. - // Used to check that caching is functioning properly. - expectedLoadCounts map[manifest.ID]int - }{ - { - name: "AllOneSnapshot", - input: testAllUsersAllCats, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testEvents, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - }, - }, - { - name: "SplitByCategory", - input: testAllUsersAllCats, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testEvents, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - { - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - { - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - { - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "IncompleteNewerThanComplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 3, - }, - }, - { - name: "IncompleteOlderThanComplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{1}, - expectedReasons: map[int][]Reason{ - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "OnlyIncomplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{0}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 3, - }, - }, - { - name: "NewestComplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{1}, - expectedReasons: map[int][]Reason{ - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "NewestIncomplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testIncompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - }, - expectedIdxs: []int{1}, - expectedReasons: map[int][]Reason{ - 1: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 3, - testID2: 3, - }, - }, - { - name: "SomeCachedSomeNewer", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 2, - testID2: 1, - }, - }, - { - name: "SomeCachedSomeNewerDifferentCategories", - input: testAllUsersAllCats, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testEvents, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EventsCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 2, - testID2: 1, - }, - }, - { - name: "SomeCachedSomeNewerIncomplete", - input: testAllUsersMail, - data: []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - testUser2, - testUser3, - ), - newManifestInfo( - testID2, - testT2, - testIncompleteMan, - testMail, - testUser3, - ), - }, - expectedIdxs: []int{0, 1}, - expectedReasons: map[int][]Reason{ - 0: { - Reason{ - ResourceOwner: testUser1, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser2, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - 1: { - Reason{ - ResourceOwner: testUser3, - Service: path.ExchangeService, - Category: path.EmailCategory, - }, - }, - }, - expectedLoadCounts: map[manifest.ID]int{ - testID1: 1, - testID2: 1, - }, - }, - { - name: "NoMatches", - input: testAllUsersMail, - data: nil, - expectedIdxs: nil, - // Stop failure for nil-map comparison. - expectedLoadCounts: map[manifest.ID]int{}, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - msm := &mockSnapshotManager{ - data: test.data, - } - - loadCounts := map[manifest.ID]int{} - msm.loadCallback = func(ids []manifest.ID) { - for _, id := range ids { - loadCounts[id]++ - } - } - - snaps := fetchPrevSnapshotManifests(ctx, msm, test.input, nil) - - // Check the proper snapshot manifests were returned. - expected := make([]*snapshot.Manifest, 0, len(test.expectedIdxs)) - for _, i := range test.expectedIdxs { - expected = append(expected, test.data[i].man) - } - - got := make([]*snapshot.Manifest, 0, len(snaps)) - for _, s := range snaps { - got = append(got, s.Manifest) - } - - assert.ElementsMatch(t, expected, got) - - // Check the reasons for selecting each manifest are correct. - expectedReasons := make(map[manifest.ID][]Reason, len(test.expectedReasons)) - for idx, reason := range test.expectedReasons { - expectedReasons[test.data[idx].man.ID] = reason - } - - for _, found := range snaps { - reason, ok := expectedReasons[found.ID] - if !ok { - // Missing or extra snapshots will be reported by earlier checks. - continue - } - - assert.ElementsMatch( - t, - reason, - found.Reasons, - "incorrect reasons for snapshot with ID %s", - found.ID, - ) - } - - // Check number of loads to make sure caching is working properly. - // Need to manually check because we don't know the order the - // user/service/category labels will be iterated over. For some tests this - // could cause more loads than the ideal case. - assert.Len(t, loadCounts, len(test.expectedLoadCounts)) - for id, count := range loadCounts { - assert.GreaterOrEqual(t, test.expectedLoadCounts[id], count) - } - }) - } -} - -func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots_customTags() { - data := []manifestInfo{ - newManifestInfo( - testID1, - testT1, - false, - testMail, - testUser1, - "fnords", - "smarf", - ), - } - expectLoad1T1 := map[manifest.ID]int{ - testID1: 1, - } - - table := []struct { - name string - input []Reason - tags map[string]string - // Use this to denote which manifests in data should be expected. Allows - // defining data in a table while not repeating things between data and - // expected. - expectedIdxs []int - // Expected number of times a manifest should try to be loaded from kopia. - // Used to check that caching is functioning properly. - expectedLoadCounts map[manifest.ID]int - }{ - { - name: "no tags specified", - tags: nil, - expectedIdxs: []int{0}, - expectedLoadCounts: expectLoad1T1, - }, - { - name: "all custom tags", - tags: map[string]string{ - "fnords": "", - "smarf": "", - }, - expectedIdxs: []int{0}, - expectedLoadCounts: expectLoad1T1, - }, - { - name: "subset of custom tags", - tags: map[string]string{"fnords": ""}, - expectedIdxs: []int{0}, - expectedLoadCounts: expectLoad1T1, - }, - { - name: "custom tag mismatch", - tags: map[string]string{"bojangles": ""}, - expectedIdxs: nil, - expectedLoadCounts: nil, - }, - } - - for _, test := range table { - suite.Run(test.name, func() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - msm := &mockSnapshotManager{ - data: data, - } - - loadCounts := map[manifest.ID]int{} - msm.loadCallback = func(ids []manifest.ID) { - for _, id := range ids { - loadCounts[id]++ - } - } - - snaps := fetchPrevSnapshotManifests(ctx, msm, testAllUsersAllCats, test.tags) - - expected := make([]*snapshot.Manifest, 0, len(test.expectedIdxs)) - for _, i := range test.expectedIdxs { - expected = append(expected, data[i].man) - } - - got := make([]*snapshot.Manifest, 0, len(snaps)) - for _, s := range snaps { - got = append(got, s.Manifest) - } - - assert.ElementsMatch(t, expected, got) - - // Need to manually check because we don't know the order the - // user/service/category labels will be iterated over. For some tests this - // could cause more loads than the ideal case. - assert.Len(t, loadCounts, len(test.expectedLoadCounts)) - for id, count := range loadCounts { - assert.GreaterOrEqual(t, test.expectedLoadCounts[id], count) - } - }) - } -} - -// mockErrorSnapshotManager returns an error the first time LoadSnapshot and -// FindSnapshot are called. After that it passes the calls through to the -// contained snapshotManager. -type mockErrorSnapshotManager struct { - retFindErr bool - retLoadErr bool - sm snapshotManager -} - -func (msm *mockErrorSnapshotManager) FindManifests( - ctx context.Context, - tags map[string]string, -) ([]*manifest.EntryMetadata, error) { - if !msm.retFindErr { - msm.retFindErr = true - return nil, assert.AnError - } - - return msm.sm.FindManifests(ctx, tags) -} - -func (msm *mockErrorSnapshotManager) LoadSnapshots( - ctx context.Context, - ids []manifest.ID, -) ([]*snapshot.Manifest, error) { - if !msm.retLoadErr { - msm.retLoadErr = true - return nil, assert.AnError - } - - return msm.sm.LoadSnapshots(ctx, ids) -} - -func (msm *mockErrorSnapshotManager) LoadSnapshot( - ctx context.Context, - id manifest.ID, -) (*snapshot.Manifest, error) { - return nil, clues.New("not implemented") -} - -func (suite *SnapshotFetchUnitSuite) TestFetchPrevSnapshots_withErrors() { - t := suite.T() - - ctx, flush := tester.NewContext(t) - defer flush() - - input := testAllUsersMail - mockData := []manifestInfo{ - newManifestInfo( - testID1, - testT1, - testCompleteMan, - testMail, - testUser1, - ), - newManifestInfo( - testID2, - testT2, - testCompleteMan, - testMail, - testUser2, - ), - newManifestInfo( - testID3, - testT3, - testCompleteMan, - testMail, - testUser3, - ), - } - - msm := &mockErrorSnapshotManager{ - sm: &mockSnapshotManager{ - data: mockData, - }, - } - - snaps := fetchPrevSnapshotManifests(ctx, msm, input, nil) - - // Only 1 snapshot should be chosen because the other two attempts fail. - // However, which one is returned is non-deterministic because maps are used. - assert.Len(t, snaps, 1) -} diff --git a/src/internal/kopia/wrapper.go b/src/internal/kopia/wrapper.go index f7830eded..f23bef534 100644 --- a/src/internal/kopia/wrapper.go +++ b/src/internal/kopia/wrapper.go @@ -596,27 +596,6 @@ func (w Wrapper) DeleteSnapshot( return nil } -// FetchPrevSnapshotManifests returns a set of manifests for complete and maybe -// incomplete snapshots for the given (resource owner, service, category) -// tuples. Up to two manifests can be returned per tuple: one complete and one -// incomplete. An incomplete manifest may be returned if it is newer than the -// newest complete manifest for the tuple. Manifests are deduped such that if -// multiple tuples match the same manifest it will only be returned once. -// If tags are provided, manifests must include a superset of the k:v pairs -// specified by those tags. Tags should pass their raw values, and will be -// normalized inside the func using MakeTagKV. -func (w Wrapper) FetchPrevSnapshotManifests( - ctx context.Context, - reasons []Reason, - tags map[string]string, -) ([]*ManifestEntry, error) { - if w.c == nil { - return nil, clues.Stack(errNotConnected).WithClues(ctx) - } - - return fetchPrevSnapshotManifests(ctx, w.c, reasons, tags), nil -} - func (w Wrapper) NewBaseFinder(bg inject.GetBackuper) (*baseFinder, error) { return newBaseFinder(w.c, bg) } diff --git a/src/internal/kopia/wrapper_test.go b/src/internal/kopia/wrapper_test.go index 00416c8ce..d00828bad 100644 --- a/src/internal/kopia/wrapper_test.go +++ b/src/internal/kopia/wrapper_test.go @@ -1101,14 +1101,8 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { subtreePath := subtreePathTmp.ToBuilder().Dir() - manifests, err := suite.w.FetchPrevSnapshotManifests( - suite.ctx, - []Reason{reason}, - nil, - ) - require.NoError(suite.T(), err, clues.ToCore(err)) - require.Len(suite.T(), manifests, 1) - require.Equal(suite.T(), suite.snapshotID, manifests[0].ID) + man, err := suite.w.c.LoadSnapshot(suite.ctx, suite.snapshotID) + require.NoError(suite.T(), err, "getting base snapshot: %v", clues.ToCore(err)) tags := map[string]string{} @@ -1206,7 +1200,7 @@ func (suite *KopiaSimpleRepoIntegrationSuite) TestBackupExcludeItem() { suite.ctx, []IncrementalBase{ { - Manifest: manifests[0].Manifest, + Manifest: man, SubtreePaths: []*path.Builder{ subtreePath, }, diff --git a/src/internal/observe/observe.go b/src/internal/observe/observe.go index ea822f431..bd94b6531 100644 --- a/src/internal/observe/observe.go +++ b/src/internal/observe/observe.go @@ -180,7 +180,7 @@ func Message(ctx context.Context, msgs ...any) { func MessageWithCompletion( ctx context.Context, msg any, -) (chan<- struct{}, func()) { +) chan<- struct{} { var ( plain = plainString(msg) loggable = fmt.Sprintf("%v", msg) @@ -191,7 +191,8 @@ func MessageWithCompletion( log.Info(loggable) if cfg.hidden() { - return ch, func() { log.Info("done - " + loggable) } + defer log.Info("done - " + loggable) + return ch } wg.Add(1) @@ -219,11 +220,11 @@ func MessageWithCompletion( bar.SetTotal(-1, true) }) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { log.Info("done - " + loggable) - }) + })() - return ch, wacb + return ch } // --------------------------------------------------------------------------- @@ -247,7 +248,8 @@ func ItemProgress( log.Debug(header) if cfg.hidden() || rc == nil || totalBytes == 0 { - return rc, func() { log.Debug("done - " + header) } + defer log.Debug("done - " + header) + return rc, func() {} } wg.Add(1) @@ -266,12 +268,17 @@ func ItemProgress( bar := progress.New(totalBytes, mpb.NopStyle(), barOpts...) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { // might be overly chatty, we can remove if needed. log.Debug("done - " + header) - }) + })() - return bar.ProxyReader(rc), wacb + abort := func() { + bar.SetTotal(-1, true) + bar.Abort(true) + } + + return bar.ProxyReader(rc), abort } // ProgressWithCount tracks the display of a bar that tracks the completion @@ -283,7 +290,7 @@ func ProgressWithCount( header string, msg any, count int64, -) (chan<- struct{}, func()) { +) chan<- struct{} { var ( plain = plainString(msg) loggable = fmt.Sprintf("%s %v - %d", header, msg, count) @@ -295,7 +302,10 @@ func ProgressWithCount( if cfg.hidden() { go listen(ctx, ch, nop, nop) - return ch, func() { log.Info("done - " + loggable) } + + defer log.Info("done - " + loggable) + + return ch } wg.Add(1) @@ -319,11 +329,11 @@ func ProgressWithCount( func() { bar.Abort(true) }, bar.Increment) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { log.Info("done - " + loggable) - }) + })() - return ch, wacb + return ch } // --------------------------------------------------------------------------- @@ -365,7 +375,7 @@ func CollectionProgress( ctx context.Context, category string, dirName any, -) (chan<- struct{}, func()) { +) chan<- struct{} { var ( counted int plain = plainString(dirName) @@ -388,7 +398,10 @@ func CollectionProgress( if cfg.hidden() || len(plain) == 0 { go listen(ctx, ch, nop, incCount) - return ch, func() { log.Infow("done - "+message, "count", counted) } + + defer log.Infow("done - "+message, "count", counted) + + return ch } wg.Add(1) @@ -420,18 +433,21 @@ func CollectionProgress( bar.Increment() }) - wacb := waitAndCloseBar(bar, func() { + go waitAndCloseBar(bar, func() { log.Infow("done - "+message, "count", counted) - }) + })() - return ch, wacb + return ch } func waitAndCloseBar(bar *mpb.Bar, log func()) func() { return func() { bar.Wait() wg.Done() - log() + + if !bar.Aborted() { + log() + } } } diff --git a/src/internal/observe/observe_test.go b/src/internal/observe/observe_test.go index 4fc36a912..08365462a 100644 --- a/src/internal/observe/observe_test.go +++ b/src/internal/observe/observe_test.go @@ -51,16 +51,14 @@ func (suite *ObserveProgressUnitSuite) TestItemProgress() { }() from := make([]byte, 100) - prog, closer := ItemProgress( + prog, abort := ItemProgress( ctx, io.NopCloser(bytes.NewReader(from)), "folder", tst, 100) require.NotNil(t, prog) - require.NotNil(t, closer) - - defer closer() + require.NotNil(t, abort) var i int @@ -105,9 +103,8 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnCtxCancel SeedWriter(context.Background(), nil, nil) }() - progCh, closer := CollectionProgress(ctx, testcat, testertons) + progCh := CollectionProgress(ctx, testcat, testertons) require.NotNil(t, progCh) - require.NotNil(t, closer) defer close(progCh) @@ -119,9 +116,6 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnCtxCancel time.Sleep(1 * time.Second) cancel() }() - - // blocks, but should resolve due to the ctx cancel - closer() } func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnChannelClose() { @@ -140,9 +134,8 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnChannelCl SeedWriter(context.Background(), nil, nil) }() - progCh, closer := CollectionProgress(ctx, testcat, testertons) + progCh := CollectionProgress(ctx, testcat, testertons) require.NotNil(t, progCh) - require.NotNil(t, closer) for i := 0; i < 50; i++ { progCh <- struct{}{} @@ -152,9 +145,6 @@ func (suite *ObserveProgressUnitSuite) TestCollectionProgress_unblockOnChannelCl time.Sleep(1 * time.Second) close(progCh) }() - - // blocks, but should resolve due to the cancel - closer() } func (suite *ObserveProgressUnitSuite) TestObserveProgress() { @@ -197,14 +187,11 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithCompletion() { message := "Test Message" - ch, closer := MessageWithCompletion(ctx, message) + ch := MessageWithCompletion(ctx, message) // Trigger completion ch <- struct{}{} - // Run the closer - this should complete because the bar was compelted above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -229,14 +216,11 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithChannelClosed() { message := "Test Message" - ch, closer := MessageWithCompletion(ctx, message) + ch := MessageWithCompletion(ctx, message) // Close channel without completing close(ch) - // Run the closer - this should complete because the channel was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -263,14 +247,11 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithContextCancelled() message := "Test Message" - _, closer := MessageWithCompletion(ctx, message) + _ = MessageWithCompletion(ctx, message) // cancel context cancel() - // Run the closer - this should complete because the context was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -296,15 +277,12 @@ func (suite *ObserveProgressUnitSuite) TestObserveProgressWithCount() { message := "Test Message" count := 3 - ch, closer := ProgressWithCount(ctx, header, message, int64(count)) + ch := ProgressWithCount(ctx, header, message, int64(count)) for i := 0; i < count; i++ { ch <- struct{}{} } - // Run the closer - this should complete because the context was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) @@ -331,13 +309,10 @@ func (suite *ObserveProgressUnitSuite) TestrogressWithCountChannelClosed() { message := "Test Message" count := 3 - ch, closer := ProgressWithCount(ctx, header, message, int64(count)) + ch := ProgressWithCount(ctx, header, message, int64(count)) close(ch) - // Run the closer - this should complete because the context was closed above - closer() - Complete() require.NotEmpty(t, recorder.String()) diff --git a/src/internal/operations/backup.go b/src/internal/operations/backup.go index 5ad400bb3..db7a007fa 100644 --- a/src/internal/operations/backup.go +++ b/src/internal/operations/backup.go @@ -407,11 +407,10 @@ func produceBackupDataCollections( ctrlOpts control.Options, errs *fault.Bus, ) ([]data.BackupCollection, prefixmatcher.StringSetReader, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Discovering items to backup") + complete := observe.MessageWithCompletion(ctx, "Discovering items to backup") defer func() { complete <- struct{}{} close(complete) - closer() }() return bp.ProduceBackupCollections( @@ -490,11 +489,10 @@ func consumeBackupCollections( isIncremental bool, errs *fault.Bus, ) (*kopia.BackupStats, *details.Builder, kopia.DetailsMergeInfoer, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Backing up data") + complete := observe.MessageWithCompletion(ctx, "Backing up data") defer func() { complete <- struct{}{} close(complete) - closer() }() tags := map[string]string{ diff --git a/src/internal/operations/backup_integration_test.go b/src/internal/operations/backup_integration_test.go index e0db072c8..786563393 100644 --- a/src/internal/operations/backup_integration_test.go +++ b/src/internal/operations/backup_integration_test.go @@ -24,6 +24,7 @@ import ( "github.com/alcionai/corso/src/internal/connector" "github.com/alcionai/corso/src/internal/connector/exchange" exchMock "github.com/alcionai/corso/src/internal/connector/exchange/mock" + exchTD "github.com/alcionai/corso/src/internal/connector/exchange/testdata" "github.com/alcionai/corso/src/internal/connector/graph" "github.com/alcionai/corso/src/internal/connector/mock" "github.com/alcionai/corso/src/internal/connector/onedrive" @@ -226,7 +227,10 @@ func checkBackupIsInManifests( found bool ) - mans, err := kw.FetchPrevSnapshotManifests(ctx, reasons, tags) + bf, err := kw.NewBaseFinder(bo.store) + require.NoError(t, err, clues.ToCore(err)) + + mans, err := bf.FindBases(ctx, reasons, tags) require.NoError(t, err, clues.ToCore(err)) for _, man := range mans { @@ -716,7 +720,7 @@ func (suite *BackupOpIntegrationSuite) TestBackup_Run_incrementalExchange() { testExchangeContinuousBackups(suite, control.Toggles{}) } -func (suite *BackupOpIntegrationSuite) TestBackup_Run_nonIncrementalExchange() { +func (suite *BackupOpIntegrationSuite) TestBackup_Run_incrementalNonDeltaExchange() { testExchangeContinuousBackups(suite, control.Toggles{DisableDelta: true}) } @@ -927,14 +931,7 @@ func testExchangeContinuousBackups(suite *BackupOpIntegrationSuite, toggles cont // verify test data was populated, and track it for comparisons // TODO: this can be swapped out for InDeets checks if we add itemRefs to folder ents. for category, gen := range dataset { - qp := graph.QueryParams{ - Category: category, - ResourceOwner: uidn, - Credentials: m365, - } - - cr, err := exchange.PopulateExchangeContainerResolver(ctx, qp, fault.New(true)) - require.NoError(t, err, "populating container resolver", category, clues.ToCore(err)) + cr := exchTD.PopulateContainerCache(t, ctx, ac, category, uidn.ID(), fault.New(true)) for destName, dest := range gen.dests { id, ok := cr.LocationInCache(dest.locRef) @@ -1040,19 +1037,12 @@ func testExchangeContinuousBackups(suite *BackupOpIntegrationSuite, toggles cont version.Backup, gen.dbf) - qp := graph.QueryParams{ - Category: category, - ResourceOwner: uidn, - Credentials: m365, - } - expectedLocRef := container3 if category == path.EmailCategory { expectedLocRef = path.Builder{}.Append(container3, container3).String() } - cr, err := exchange.PopulateExchangeContainerResolver(ctx, qp, fault.New(true)) - require.NoError(t, err, "populating container resolver", category, clues.ToCore(err)) + cr := exchTD.PopulateContainerCache(t, ctx, ac, category, uidn.ID(), fault.New(true)) id, ok := cr.LocationInCache(expectedLocRef) require.Truef(t, ok, "dir %s found in %s cache", expectedLocRef, category) diff --git a/src/internal/operations/restore.go b/src/internal/operations/restore.go index 5b2c9bd71..ab0b5a4c8 100644 --- a/src/internal/operations/restore.go +++ b/src/internal/operations/restore.go @@ -236,8 +236,7 @@ func (op *RestoreOperation) do( observe.Message(ctx, fmt.Sprintf("Discovered %d items in backup %s to restore", len(paths), op.BackupID)) - kopiaComplete, closer := observe.MessageWithCompletion(ctx, "Enumerating items in repository") - defer closer() + kopiaComplete := observe.MessageWithCompletion(ctx, "Enumerating items in repository") defer close(kopiaComplete) dcs, err := op.kopia.ProduceRestoreCollections(ctx, bup.SnapshotID, paths, opStats.bytesRead, op.Errors) @@ -322,11 +321,10 @@ func consumeRestoreCollections( dcs []data.RestoreCollection, errs *fault.Bus, ) (*details.Details, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Restoring data") + complete := observe.MessageWithCompletion(ctx, "Restoring data") defer func() { complete <- struct{}{} close(complete) - closer() }() deets, err := rc.ConsumeRestoreCollections( diff --git a/src/pkg/backup/details/details.go b/src/pkg/backup/details/details.go index 15a0e3a7d..c24f8fb42 100644 --- a/src/pkg/backup/details/details.go +++ b/src/pkg/backup/details/details.go @@ -388,10 +388,17 @@ func (b *Builder) Details() *Details { b.mu.Lock() defer b.mu.Unlock() - // Write the cached folder entries to details - b.d.Entries = append(b.d.Entries, maps.Values(b.knownFolders)...) + ents := make([]Entry, len(b.d.Entries)) + copy(ents, b.d.Entries) - return &b.d + // Write the cached folder entries to details + details := &Details{ + DetailsModel{ + Entries: append(ents, maps.Values(b.knownFolders)...), + }, + } + + return details } // -------------------------------------------------------------------------------- diff --git a/src/pkg/backup/details/details_test.go b/src/pkg/backup/details/details_test.go index d6aae6bbc..f6c1097ae 100644 --- a/src/pkg/backup/details/details_test.go +++ b/src/pkg/backup/details/details_test.go @@ -1033,6 +1033,84 @@ func (suite *DetailsUnitSuite) TestBuilder_Add_cleansFileIDSuffixes() { } } +func (suite *DetailsUnitSuite) TestBuilder_DetailsNoDuplicate() { + var ( + t = suite.T() + b = Builder{} + svc = path.OneDriveService + cat = path.FilesCategory + info = ItemInfo{ + OneDrive: &OneDriveInfo{ + ItemType: OneDriveItem, + ItemName: "in", + DriveName: "dn", + DriveID: "d", + }, + } + + dataSfx = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i1" + metadata.DataFileSuffix}) + dataSfx2 = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i2" + metadata.DataFileSuffix}) + dirMetaSfx = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i1" + metadata.DirMetaFileSuffix}) + metaSfx = makeItemPath(t, svc, cat, "t", "u", []string{"d", "r:", "f", "i1" + metadata.MetaFileSuffix}) + ) + + // Don't need to generate folders for this entry, we just want the itemRef + loc := &path.Builder{} + + err := b.Add(dataSfx, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + err = b.Add(dataSfx2, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + err = b.Add(dirMetaSfx, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + err = b.Add(metaSfx, loc, false, info) + require.NoError(t, err, clues.ToCore(err)) + + b.knownFolders = map[string]Entry{ + "dummy": { + RepoRef: "xyz", + ShortRef: "abcd", + ParentRef: "1234", + LocationRef: "ab", + ItemRef: "cd", + Updated: false, + ItemInfo: info, + }, + "dummy2": { + RepoRef: "xyz2", + ShortRef: "abcd2", + ParentRef: "12342", + LocationRef: "ab2", + ItemRef: "cd2", + Updated: false, + ItemInfo: info, + }, + "dummy3": { + RepoRef: "xyz3", + ShortRef: "abcd3", + ParentRef: "12343", + LocationRef: "ab3", + ItemRef: "cd3", + Updated: false, + ItemInfo: info, + }, + } + + // mark the capacity prior to calling details. + // if the entries slice gets modified and grows to a + // 5th space, then the capacity would grow as well. + capCheck := cap(b.d.Entries) + + assert.Len(t, b.Details().Entries, 7) // 4 ents + 3 known folders + assert.Len(t, b.Details().Entries, 7) // possible reason for err: knownFolders got added twice + + assert.Len(t, b.d.Entries, 4) // len should not have grown + assert.Equal(t, capCheck, cap(b.d.Entries)) // capacity should not have grown +} + func makeItemPath( t *testing.T, service path.ServiceType, diff --git a/src/pkg/path/path.go b/src/pkg/path/path.go index 189e24449..f35e4a065 100644 --- a/src/pkg/path/path.go +++ b/src/pkg/path/path.go @@ -240,6 +240,15 @@ func (pb Builder) Dir() *Builder { } } +// HeadElem returns the first element in the Builder. +func (pb Builder) HeadElem() string { + if len(pb.elements) == 0 { + return "" + } + + return pb.elements[0] +} + // LastElem returns the last element in the Builder. func (pb Builder) LastElem() string { if len(pb.elements) == 0 { diff --git a/src/pkg/repository/repository.go b/src/pkg/repository/repository.go index 7f37f4eaf..fbe374223 100644 --- a/src/pkg/repository/repository.go +++ b/src/pkg/repository/repository.go @@ -203,8 +203,7 @@ func Connect( // their output getting clobbered (#1720) defer observe.Complete() - complete, closer := observe.MessageWithCompletion(ctx, "Connecting to repository") - defer closer() + complete := observe.MessageWithCompletion(ctx, "Connecting to repository") defer close(complete) kopiaRef := kopia.NewConn(s) @@ -630,11 +629,10 @@ func connectToM365( sel selectors.Selector, acct account.Account, ) (*connector.GraphConnector, error) { - complete, closer := observe.MessageWithCompletion(ctx, "Connecting to M365") + complete := observe.MessageWithCompletion(ctx, "Connecting to M365") defer func() { complete <- struct{}{} close(complete) - closer() }() // retrieve data from the producer diff --git a/src/pkg/services/m365/api/contacts.go b/src/pkg/services/m365/api/contacts.go index 8588711f8..7574223c9 100644 --- a/src/pkg/services/m365/api/contacts.go +++ b/src/pkg/services/m365/api/contacts.go @@ -34,12 +34,13 @@ type Contacts struct { // containers // --------------------------------------------------------------------------- -// CreateContactFolder makes a contact folder with the displayName of folderName. +// CreateContainer makes a contact folder with the displayName of folderName. // If successful, returns the created folder object. -func (c Contacts) CreateContactFolder( +func (c Contacts) CreateContainer( ctx context.Context, userID, containerName string, -) (models.ContactFolderable, error) { + _ string, // parentContainerID needed for iface, doesn't apply to contacts +) (graph.Container, error) { body := models.NewContactFolder() body.SetDisplayName(ptr.To(containerName)) diff --git a/src/pkg/services/m365/api/events.go b/src/pkg/services/m365/api/events.go index bd63490f8..73503c0a2 100644 --- a/src/pkg/services/m365/api/events.go +++ b/src/pkg/services/m365/api/events.go @@ -38,16 +38,17 @@ type Events struct { // containers // --------------------------------------------------------------------------- -// CreateCalendar makes an event Calendar with the name in the user's M365 exchange account +// CreateContainer makes an event Calendar with the name in the user's M365 exchange account // Reference: https://docs.microsoft.com/en-us/graph/api/user-post-calendars?view=graph-rest-1.0&tabs=go -func (c Events) CreateCalendar( +func (c Events) CreateContainer( ctx context.Context, userID, containerName string, -) (models.Calendarable, error) { + _ string, // parentContainerID needed for iface, doesn't apply to contacts +) (graph.Container, error) { body := models.NewCalendar() body.SetName(&containerName) - mdl, err := c.Stable. + container, err := c.Stable. Client(). Users(). ByUserId(userID). @@ -57,7 +58,7 @@ func (c Events) CreateCalendar( return nil, graph.Wrap(ctx, err, "creating calendar") } - return mdl, nil + return CalendarDisplayable{Calendarable: container}, nil } // DeleteContainer removes a calendar from user's M365 account @@ -130,7 +131,7 @@ func (c Events) GetContainerByID( func (c Events) GetContainerByName( ctx context.Context, userID, containerName string, -) (models.Calendarable, error) { +) (graph.Container, error) { filter := fmt.Sprintf("name eq '%s'", containerName) options := &users.ItemCalendarsRequestBuilderGetRequestConfiguration{ QueryParameters: &users.ItemCalendarsRequestBuilderGetQueryParameters{ @@ -167,7 +168,7 @@ func (c Events) GetContainerByName( return nil, err } - return cal, nil + return graph.CalendarDisplayable{Calendarable: cal}, nil } func (c Events) PatchCalendar( diff --git a/src/pkg/services/m365/api/mail.go b/src/pkg/services/m365/api/mail.go index ba275e762..872547ffe 100644 --- a/src/pkg/services/m365/api/mail.go +++ b/src/pkg/services/m365/api/mail.go @@ -64,10 +64,10 @@ func (c Mail) CreateMailFolder( return mdl, nil } -func (c Mail) CreateMailFolderWithParent( +func (c Mail) CreateContainer( ctx context.Context, userID, containerName, parentContainerID string, -) (models.MailFolderable, error) { +) (graph.Container, error) { isHidden := false body := models.NewMailFolder() body.SetDisplayName(&containerName)