Speed up Exchange backup by parallelizing url fetch (#1608)
## Description From rough numbers we can speedup an account with ~3000 emails, ~1000 contacts and ~1000 events from ~18m to <3m. ## Type of change <!--- Please check the type of change your PR introduces: ---> - [ ] 🌻 Feature - [ ] 🐛 Bugfix - [ ] 🗺️ Documentation - [ ] 🤖 Test - [ ] 💻 CI/Deployment - [ ] 🐹 Trivial/Minor ## Issue(s) <!-- Can reference multiple issues. Use one of the following "magic words" - "closes, fixes" to auto-close the Github issue. --> * https://github.com/alcionai/corso/issues/362 * https://github.com/alcionai/corso/issues/1595 * https://github.com/alcionai/corso/pull/1607 ## Test Plan <!-- How will this be tested prior to merging.--> - [ ] 💪 Manual - [x] ⚡ Unit test - [x] 💚 E2E
This commit is contained in:
parent
58e70166d4
commit
784db9e12e
@ -8,6 +8,9 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
absser "github.com/microsoft/kiota-abstractions-go/serialization"
|
absser "github.com/microsoft/kiota-abstractions-go/serialization"
|
||||||
kw "github.com/microsoft/kiota-serialization-json-go"
|
kw "github.com/microsoft/kiota-serialization-json-go"
|
||||||
@ -33,6 +36,10 @@ var (
|
|||||||
const (
|
const (
|
||||||
collectionChannelBufferSize = 1000
|
collectionChannelBufferSize = 1000
|
||||||
numberOfRetries = 4
|
numberOfRetries = 4
|
||||||
|
|
||||||
|
// Outlooks expects max 4 concurrent requests
|
||||||
|
// https://learn.microsoft.com/en-us/graph/throttling-limits#outlook-service-limits
|
||||||
|
urlPrefetchChannelBufferSize = 4
|
||||||
)
|
)
|
||||||
|
|
||||||
// Collection implements the interface from data.Collection
|
// Collection implements the interface from data.Collection
|
||||||
@ -115,11 +122,11 @@ func (col *Collection) populateByOptionIdentifier(
|
|||||||
) {
|
) {
|
||||||
var (
|
var (
|
||||||
errs error
|
errs error
|
||||||
success int
|
success int64
|
||||||
totalBytes int64
|
totalBytes int64
|
||||||
|
wg sync.WaitGroup
|
||||||
|
|
||||||
user = col.user
|
user = col.user
|
||||||
objectWriter = kw.NewJsonSerializationWriter()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
colProgress, closer := observe.CollectionProgress(user, col.fullPath.Category().String(), col.fullPath.Folder())
|
colProgress, closer := observe.CollectionProgress(user, col.fullPath.Category().String(), col.fullPath.Folder())
|
||||||
@ -127,7 +134,7 @@ func (col *Collection) populateByOptionIdentifier(
|
|||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
close(colProgress)
|
close(colProgress)
|
||||||
col.finishPopulation(ctx, success, totalBytes, errs)
|
col.finishPopulation(ctx, int(success), totalBytes, errs)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// get QueryBasedonIdentifier
|
// get QueryBasedonIdentifier
|
||||||
@ -139,34 +146,61 @@ func (col *Collection) populateByOptionIdentifier(
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, identifier := range col.jobs {
|
// Limit the max number of active requests to GC
|
||||||
response, err := query(ctx, col.service, user, identifier)
|
semaphoreCh := make(chan struct{}, urlPrefetchChannelBufferSize)
|
||||||
if err != nil {
|
defer close(semaphoreCh)
|
||||||
errs = support.WrapAndAppendf(user, err, errs)
|
|
||||||
|
|
||||||
if col.service.ErrPolicy() {
|
errUpdater := func(user string, err error) {
|
||||||
break
|
errs = support.WrapAndAppend(user, err, errs)
|
||||||
}
|
|
||||||
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
byteCount, err := serializeFunc(ctx, col.service.Client(), objectWriter, col.data, response, user)
|
|
||||||
if err != nil {
|
|
||||||
errs = support.WrapAndAppendf(user, err, errs)
|
|
||||||
|
|
||||||
if col.service.ErrPolicy() {
|
|
||||||
break
|
|
||||||
}
|
|
||||||
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
success++
|
|
||||||
|
|
||||||
totalBytes += int64(byteCount)
|
|
||||||
colProgress <- struct{}{}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, identifier := range col.jobs {
|
||||||
|
if col.service.ErrPolicy() && errs != nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
semaphoreCh <- struct{}{}
|
||||||
|
|
||||||
|
wg.Add(1)
|
||||||
|
|
||||||
|
go func(identifier string) {
|
||||||
|
defer wg.Done()
|
||||||
|
defer func() { <-semaphoreCh }()
|
||||||
|
|
||||||
|
var (
|
||||||
|
response absser.Parsable
|
||||||
|
err error
|
||||||
|
)
|
||||||
|
|
||||||
|
for i := 1; i <= numberOfRetries; i++ {
|
||||||
|
response, err = query(ctx, col.service, user, identifier)
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// TODO: Tweak sleep times
|
||||||
|
if i < numberOfRetries {
|
||||||
|
time.Sleep(time.Duration(3*(i+1)) * time.Second)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
errUpdater(user, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
byteCount, err := serializeFunc(ctx, col.service.Client(), kw.NewJsonSerializationWriter(), col.data, response, user)
|
||||||
|
if err != nil {
|
||||||
|
errUpdater(user, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic.AddInt64(&success, 1)
|
||||||
|
atomic.AddInt64(&totalBytes, int64(byteCount))
|
||||||
|
|
||||||
|
colProgress <- struct{}{}
|
||||||
|
}(identifier)
|
||||||
|
}
|
||||||
|
|
||||||
|
wg.Wait()
|
||||||
}
|
}
|
||||||
|
|
||||||
// terminatePopulateSequence is a utility function used to close a Collection's data channel
|
// terminatePopulateSequence is a utility function used to close a Collection's data channel
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user