Compare commits

...

23 Commits

Author SHA1 Message Date
Abhishek Pandey
06d4f764e7 Add sharepoint fields 2023-08-18 19:26:31 +05:30
Abhishek Pandey
3ebb2c1c04 put back dq 2023-08-18 16:14:29 +05:30
Abhishek Pandey
b483703b47 Rebase fix 2023-08-18 16:14:29 +05:30
Abhishek Pandey
64ff2e60ea Remove log 2023-08-18 16:14:29 +05:30
Abhishek Pandey
416dda3d6d Switch back to full backups 2023-08-18 16:14:29 +05:30
Abhishek Pandey
3f609219a1 Add internal version of drive itemable 2023-08-18 16:14:29 +05:30
Abhishek Pandey
7f33d7beae Move to helper 2023-08-18 16:12:55 +05:30
Abhishek Pandey
5811f8c443 Add ctx 2023-08-18 16:06:04 +05:30
Abhishek Pandey
782bd158e7 Add size pkg 2023-08-18 15:54:40 +05:30
Abhishek Pandey
f785e588ab DQ logs 2023-08-16 19:01:14 +05:30
Abhishek Pandey
838288ea13 Revert sleep 2023-08-16 14:18:44 +05:30
Abhishek Pandey
f8af114c13 retain map, wait for 5 mins 2023-08-16 14:18:44 +05:30
Abhishek Pandey
e67bbac5ed DQ marker 2023-08-16 14:18:44 +05:30
Abhishek Pandey
e38c3ebdff Fix 2023-08-16 14:18:44 +05:30
Abhishek Pandey
16a363c3ef Only delta queries 2023-08-16 14:18:44 +05:30
Abhishek Pandey
11bbc98bdf free mem 2023-08-16 14:08:09 +05:30
Abhishek Pandey
e647659d4b Rebase 2023-08-16 14:08:09 +05:30
Abhishek Pandey
3c4715b8a4 free map 2023-08-16 14:08:09 +05:30
Abhishek Pandey
477dd5e142 Slightly less aggressive profiling 2023-08-16 14:08:09 +05:30
Abhishek Pandey
c62de327bf Add more mem usage counters 2023-08-16 14:08:09 +05:30
Abhishek Pandey
f593d97907 Add heap alloc 2023-08-16 14:08:09 +05:30
Abhishek Pandey
e3f08c1b04 more frequent profiles 2023-08-16 14:08:09 +05:30
Abhishek Pandey
def063733e profiling 2023-08-16 14:08:09 +05:30
12 changed files with 304 additions and 47 deletions

View File

@ -1,9 +1,84 @@
package main
import (
"context"
"log"
"os"
"runtime"
"runtime/pprof"
"strconv"
"time"
"github.com/alcionai/corso/src/cli"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/pkg/profile"
)
var profileTicker = time.NewTicker(300 * time.Second)
var printTicker = time.NewTicker(1 * time.Second)
var profileCounter = 0
func main() {
defer profile.Start(profile.MemProfile).Stop()
go func() {
for {
select {
case <-profileTicker.C:
filename := "mem." + strconv.Itoa(profileCounter) + ".pprof"
f, _ := os.Create(filename)
if err := pprof.WriteHeapProfile(f); err != nil {
log.Fatal("could not write memory profile: ", err)
}
f.Close()
profileCounter++
}
}
}()
go func() {
for {
select {
case <-printTicker.C:
PrintMemUsage()
}
}
}()
cli.Handle()
}
// PrintMemUsage outputs the current, total and OS memory being used. As well as the number
// of garage collection cycles completed.
func PrintMemUsage() {
ctx := context.Background()
var m runtime.MemStats
runtime.ReadMemStats(&m)
// For info on each, see: https://golang.org/pkg/runtime/#MemStats
logger.Ctx(ctx).Info("Alloc = ", bToMb(m.Alloc), " MB")
logger.Ctx(ctx).Info("TotalAlloc = ", bToMb(m.TotalAlloc), " MB")
logger.Ctx(ctx).Info("HeapAlloc = ", bToMb(m.HeapAlloc), " MB") // same as Alloc
logger.Ctx(ctx).Info("HeapReleased = ", bToMb(m.HeapReleased), " MB")
logger.Ctx(ctx).Info("HeapObjects = ", bToMb(m.HeapObjects), " MB")
logger.Ctx(ctx).Info("HeapSys = ", bToMb(m.HeapSys), " MB")
logger.Ctx(ctx).Info("HeapIdle = ", bToMb(m.HeapIdle), " MB")
logger.Ctx(ctx).Info("HeapInuse = ", bToMb(m.HeapInuse), " MB")
logger.Ctx(ctx).Info("Mallocs = ", bToMb(m.Mallocs), " MB")
logger.Ctx(ctx).Info("Frees = ", bToMb(m.Frees), " MB")
logger.Ctx(ctx).Info("StackInuse = ", bToMb(m.StackInuse), " MB")
logger.Ctx(ctx).Info("StackSys = ", bToMb(m.StackSys), " MB")
logger.Ctx(ctx).Info("Sys = ", bToMb(m.Sys), " MB")
logger.Ctx(ctx).Info("NumGC = ", m.NumGC)
}
func bToMb(b uint64) uint64 {
return b / 1024 / 1024
}

View File

@ -42,9 +42,11 @@ require (
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
github.com/andybalholm/brotli v1.0.5 // indirect
github.com/felixge/fgprof v0.9.3 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gofrs/flock v0.8.1 // indirect
github.com/golang-jwt/jwt/v5 v5.0.0 // indirect
github.com/google/pprof v0.0.0-20230602150820-91b7bce49751 // indirect
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
@ -65,6 +67,7 @@ require (
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.7.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.3.0 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.1.0 // indirect
github.com/DmitriyVTitov/size v1.5.0
github.com/beorn7/perks v1.0.1 // indirect
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
@ -99,6 +102,7 @@ require (
github.com/natefinch/atomic v1.0.1 // indirect
github.com/pierrec/lz4 v2.6.1+incompatible // indirect
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect
github.com/pkg/profile v1.7.0
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/client_golang v1.16.0 // indirect
github.com/prometheus/client_model v0.4.0 // indirect

View File

@ -48,6 +48,8 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/DATA-DOG/go-sqlmock v1.4.1 h1:ThlnYciV1iM/V0OSF/dtkqWb6xo5qITT1TJBG1MRDJM=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DmitriyVTitov/size v1.5.0 h1:/PzqxYrOyOUX1BXj6J9OuVRVGe+66VL4D9FlUaW515g=
github.com/DmitriyVTitov/size v1.5.0/go.mod h1:le6rNI4CoLQV1b9gzp1+3d7hMAD/uu2QcJ+aYbNgiU0=
github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 h1:IEjq88XO4PuBDcvmjQJcQGg+w+UaafSy8G5Kcb5tBhI=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
github.com/VividCortex/ewma v1.2.0/go.mod h1:nz4BbCtbLyFDeC9SUHbtcT5644juEuWfUAUnGx7j5l4=
@ -112,6 +114,8 @@ github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1m
github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
github.com/fsnotify/fsnotify v1.6.0/go.mod h1:sl3t1tCWJFWoRz9R8WJCbQihKKwmorjAbSClcnxKAGw=
@ -139,6 +143,7 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
@ -190,6 +195,9 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf
github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
github.com/google/pprof v0.0.0-20230602150820-91b7bce49751 h1:hR7/MlvK23p6+lIw9SN1TigNLn9ZnF3W4SYRKq2gAHs=
github.com/google/pprof v0.0.0-20230602150820-91b7bce49751/go.mod h1:Jh3hGz2jkYak8qXPD19ryItVnUgpgeqzdkY/D0EaeuA=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I=
@ -219,6 +227,7 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
@ -319,6 +328,8 @@ github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=
github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=
github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@ -588,6 +599,7 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View File

@ -8,11 +8,14 @@ import (
"sync"
"sync/atomic"
"time"
"unsafe"
"github.com/alcionai/clues"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"github.com/spatialcurrent/go-lazy/pkg/lazy"
i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e "time"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/data"
"github.com/alcionai/corso/src/internal/m365/collection/drive/metadata"
@ -52,7 +55,7 @@ type Collection struct {
// represents
folderPath path.Path
// M365 IDs of file items within this collection
driveItems map[string]models.DriveItemable
driveItems map[string]CorsoDriveItemable
// Primary M365 ID of the drive this collection was created from
driveID string
@ -89,6 +92,120 @@ type Collection struct {
urlCache getItemPropertyer
}
// Replica of models.DriveItemable
type CorsoDriveItemable interface {
GetId() *string
GetName() *string
GetSize() *int64
GetFile() models.Fileable
GetFolder() *models.Folder
GetAdditionalData() map[string]interface{}
GetParentReference() models.ItemReferenceable
SetParentReference(models.ItemReferenceable)
GetShared() models.Sharedable
GetCreatedBy() models.IdentitySetable
GetCreatedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
GetLastModifiedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
GetMalware() models.Malwareable
GetSharepointIds() models.SharepointIdsable
GetOdataType() *string
}
type CorsoDriveItem struct {
ID *string
Name *string
Size *int64
File models.Fileable
AdditionalData map[string]interface{}
ParentReference models.ItemReferenceable
Shared models.Sharedable
CreatedBy models.IdentitySetable
CreatedDateTime *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
LastModifiedDateTime *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time
Malware models.Malwareable
SharepointIds models.SharepointIdsable
OdataType *string
}
func (c *CorsoDriveItem) GetId() *string {
return c.ID
}
func (c *CorsoDriveItem) GetName() *string {
return c.Name
}
func (c *CorsoDriveItem) GetSize() *int64 {
return c.Size
}
func (c *CorsoDriveItem) GetFile() models.Fileable {
return c.File
}
func (c *CorsoDriveItem) GetFolder() *models.Folder {
return nil
}
func (c *CorsoDriveItem) GetAdditionalData() map[string]interface{} {
return c.AdditionalData
}
func (c *CorsoDriveItem) GetParentReference() models.ItemReferenceable {
return c.ParentReference
}
func (c *CorsoDriveItem) SetParentReference(parent models.ItemReferenceable) {
c.ParentReference = parent
}
func (c *CorsoDriveItem) GetShared() models.Sharedable {
return c.Shared
}
func (c *CorsoDriveItem) GetCreatedBy() models.IdentitySetable {
return c.CreatedBy
}
func (c *CorsoDriveItem) GetCreatedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time {
return c.CreatedDateTime
}
func (c *CorsoDriveItem) GetLastModifiedDateTime() *i336074805fc853987abe6f7fe3ad97a6a6f3077a16391fec744f671a015fbd7e.Time {
return c.LastModifiedDateTime
}
func (c *CorsoDriveItem) GetMalware() models.Malwareable {
return c.Malware
}
func (c *CorsoDriveItem) GetSharepointIds() models.SharepointIdsable {
return c.SharepointIds
}
func (c *CorsoDriveItem) GetOdataType() *string {
return c.OdataType
}
// models.DriveItemable to CorsoDriveItemable
func ToCorsoDriveItemable(item models.DriveItemable) CorsoDriveItemable {
return &CorsoDriveItem{
ID: item.GetId(),
Name: item.GetName(),
Size: item.GetSize(),
File: item.GetFile(),
ParentReference: item.GetParentReference(),
Shared: item.GetShared(),
CreatedBy: item.GetCreatedBy(),
CreatedDateTime: item.GetCreatedDateTime(),
LastModifiedDateTime: item.GetLastModifiedDateTime(),
Malware: item.GetMalware(),
AdditionalData: item.GetAdditionalData(),
SharepointIds: item.GetSharepointIds(),
OdataType: item.GetOdataType(),
}
}
func pathToLocation(p path.Path) (*path.Builder, error) {
if p == nil {
return nil, nil
@ -160,7 +277,7 @@ func newColl(
handler: handler,
folderPath: currPath,
prevPath: prevPath,
driveItems: map[string]models.DriveItemable{},
driveItems: map[string]CorsoDriveItemable{},
driveID: driveID,
data: make(chan data.Item, graph.Parallelism(path.OneDriveMetadataService).CollectionBufferSize()),
statusUpdater: statusUpdater,
@ -178,8 +295,9 @@ func newColl(
// populated. The return values denotes if the item was previously
// present or is new one.
func (oc *Collection) Add(item models.DriveItemable) bool {
_, found := oc.driveItems[ptr.Val(item.GetId())]
oc.driveItems[ptr.Val(item.GetId())] = item
cdi := ToCorsoDriveItemable(item)
_, found := oc.driveItems[ptr.Val(cdi.GetId())]
oc.driveItems[ptr.Val(cdi.GetId())] = cdi
// if !found, it's a new addition
return !found
@ -265,26 +383,27 @@ func (i *Item) ModTime() time.Time { return i.info.Modified() }
func (oc *Collection) getDriveItemContent(
ctx context.Context,
driveID string,
item models.DriveItemable,
item CorsoDriveItemable,
errs *fault.Bus,
) (io.ReadCloser, error) {
var (
itemID = ptr.Val(item.GetId())
itemName = ptr.Val(item.GetName())
// itemID = ptr.Val(item.GetId())
// itemName = ptr.Val(item.GetName())
//el = errs.Local()
)
itemData, err := downloadContent(ctx, oc.handler, oc.urlCache, item, oc.driveID)
if err != nil {
if clues.HasLabel(err, graph.LabelsMalware) || (item != nil && item.GetMalware() != nil) {
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipMalware).Info("item flagged as malware")
errs.AddSkip(ctx, fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, graph.ItemInfo(item)))
//errs.AddSkip(ctx, fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, graph.ItemInfo(item)))
return nil, clues.Wrap(err, "malware item").Label(graph.LabelsSkippable)
}
if clues.HasLabel(err, graph.LabelStatus(http.StatusNotFound)) || graph.IsErrDeletedInFlight(err) {
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipNotFound).Info("item not found")
errs.AddSkip(ctx, fault.FileSkip(fault.SkipNotFound, driveID, itemID, itemName, graph.ItemInfo(item)))
//errs.AddSkip(ctx, fault.FileSkip(fault.SkipNotFound, driveID, itemID, itemName, graph.ItemInfo(item)))
return nil, clues.Wrap(err, "deleted item").Label(graph.LabelsSkippable)
}
@ -299,7 +418,7 @@ func (oc *Collection) getDriveItemContent(
// restore, or we have to handle it separately by somehow
// deleting the entire collection.
logger.CtxErr(ctx, err).With("skipped_reason", fault.SkipBigOneNote).Info("max OneNote file size exceeded")
errs.AddSkip(ctx, fault.FileSkip(fault.SkipBigOneNote, driveID, itemID, itemName, graph.ItemInfo(item)))
//errs.AddSkip(ctx, fault.FileSkip(fault.SkipBigOneNote, driveID, itemID, itemName, graph.ItemInfo(item)))
return nil, clues.Wrap(err, "max oneNote item").Label(graph.LabelsSkippable)
}
@ -327,7 +446,7 @@ func downloadContent(
ctx context.Context,
iaag itemAndAPIGetter,
uc getItemPropertyer,
item models.DriveItemable,
item CorsoDriveItemable,
driveID string,
) (io.ReadCloser, error) {
itemID := ptr.Val(item.GetId())
@ -360,7 +479,8 @@ func downloadContent(
return nil, clues.Wrap(err, "retrieving expired item")
}
content, err = downloadItem(ctx, iaag, di)
cdi := ToCorsoDriveItemable(di)
content, err = downloadItem(ctx, iaag, cdi)
if err != nil {
return nil, clues.Wrap(err, "content download retry")
}
@ -448,7 +568,7 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
wg.Add(1)
go func(item models.DriveItemable) {
go func(item CorsoDriveItemable) {
defer wg.Done()
defer func() { <-semaphoreCh }()
@ -467,13 +587,19 @@ func (oc *Collection) populateItems(ctx context.Context, errs *fault.Bus) {
wg.Wait()
// print memory for oc.DriveItems
size := unsafe.Sizeof(oc.driveItems)
logger.Ctx(ctx).Infow("driveItems map size", "size", size)
// free up memory
//oc.driveItems = make(map[string]models.DriveItemable)
oc.reportAsCompleted(ctx, int(stats.itemsFound), int(stats.itemsRead), stats.byteCount)
}
func (oc *Collection) populateDriveItem(
ctx context.Context,
parentPath *path.Builder,
item models.DriveItemable,
item CorsoDriveItemable,
stats *driveStats,
itemExtensionFactory []extensions.CreateItemExtensioner,
errs *fault.Bus,

View File

@ -8,9 +8,11 @@ import (
"strings"
"github.com/alcionai/clues"
"github.com/microsoft/kiota-abstractions-go/store"
"github.com/microsoftgraph/msgraph-sdk-go/models"
"golang.org/x/exp/maps"
sizePkg "github.com/DmitriyVTitov/size"
"github.com/alcionai/corso/src/internal/common/prefixmatcher"
"github.com/alcionai/corso/src/internal/common/ptr"
"github.com/alcionai/corso/src/internal/data"
@ -668,6 +670,20 @@ func (c *Collections) getCollectionPath(
return collectionPath, nil
}
func getBackingStoreSize(ctx context.Context, item models.DriveItemable) {
driveItem := item.(*models.DriveItem)
// Get backing store
st := driveItem.GetBackingStore().(*store.InMemoryBackingStore)
logger.Ctx(ctx).Infow("store size", "in_mem_store_size", sizePkg.Of(st.Store))
logger.Ctx(ctx).Infow("drive item size", "in_mem_store_size", sizePkg.Of(driveItem))
}
func getBackingStoreSize1(ctx context.Context, item CorsoDriveItemable) {
driveItem := item.(*CorsoDriveItem)
// Get backing store
logger.Ctx(ctx).Infow("corso serialized size", "in_mem_store_size", sizePkg.Of(driveItem))
}
// UpdateCollections initializes and adds the provided drive items to Collections
// A new collection is created for every drive folder (or package).
// oldPaths is the unchanged data that was loaded from the metadata file.
@ -694,10 +710,11 @@ func (c *Collections) UpdateCollections(
var (
itemID = ptr.Val(item.GetId())
itemName = ptr.Val(item.GetName())
ictx = clues.Add(ctx, "item_id", itemID, "item_name", clues.Hide(itemName))
isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil
)
ictx := clues.Add(ctx, "item_id", itemID, "item_name", clues.Hide(itemName))
if item.GetMalware() != nil {
addtl := graph.ItemInfo(item)
skip := fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, addtl)
@ -809,7 +826,10 @@ func (c *Collections) UpdateCollections(
// that OneDrive always returns all folders on the path of an item
// before the item. This seems to hold true for now at least.
if col.Add(item) {
getBackingStoreSize(ictx, item)
c.NumItems++
cdi := ToCorsoDriveItemable(item)
getBackingStoreSize1(ictx, cdi)
}
case item.GetFile() != nil:
@ -846,8 +866,11 @@ func (c *Collections) UpdateCollections(
itemCollection[driveID][itemID] = parentID
if collection.Add(item) {
getBackingStoreSize(ictx, item)
c.NumItems++
c.NumFiles++
cdi := ToCorsoDriveItemable(item)
getBackingStoreSize1(ictx, cdi)
}
// Do this after adding the file to the collection so if we fail to add

View File

@ -20,7 +20,7 @@ type ItemInfoAugmenter interface {
// and kiota drops any SetSize update.
AugmentItemInfo(
dii details.ItemInfo,
item models.DriveItemable,
item CorsoDriveItemable,
size int64,
parentPath *path.Builder,
) details.ItemInfo

View File

@ -32,7 +32,7 @@ var downloadURLKeys = []string{
func downloadItem(
ctx context.Context,
ag api.Getter,
item models.DriveItemable,
item CorsoDriveItemable,
) (io.ReadCloser, error) {
if item == nil {
return nil, clues.New("nil item")
@ -135,7 +135,7 @@ func downloadItemMeta(
ctx context.Context,
gip GetItemPermissioner,
driveID string,
item models.DriveItemable,
item CorsoDriveItemable,
) (io.ReadCloser, int, error) {
meta := metadata.Metadata{FileName: ptr.Val(item.GetName())}

View File

@ -75,6 +75,7 @@ func collectItems(
pager.SetNext(prevDelta)
}
logger.Ctx(ctx).Info("delta query iteration begin")
for {
// assume delta urls here, which allows single-token consumption
page, err := pager.GetPage(graph.ConsumeNTokens(ctx, graph.SingleGetOrDeltaLC))
@ -128,6 +129,9 @@ func collectItems(
logger.Ctx(ctx).Debugw("Found nextLink", "link", nextLink)
pager.SetNext(nextLink)
}
logger.Ctx(ctx).Info("delta query iteration end")
logger.Ctx(ctx).Info("delta query iteration end")
return DeltaUpdate{URL: newDeltaURL, Reset: invalidPrevDelta}, newPaths, excluded, nil
}

View File

@ -81,7 +81,7 @@ func (h itemBackupHandler) NewItemPager(
func (h itemBackupHandler) AugmentItemInfo(
dii details.ItemInfo,
item models.DriveItemable,
item CorsoDriveItemable,
size int64,
parentPath *path.Builder,
) details.ItemInfo {
@ -158,7 +158,7 @@ func (h itemRestoreHandler) NewDrivePager(
// and kiota drops any SetSize update.
func (h itemRestoreHandler) AugmentItemInfo(
dii details.ItemInfo,
item models.DriveItemable,
item CorsoDriveItemable,
size int64,
parentPath *path.Builder,
) details.ItemInfo {
@ -243,7 +243,7 @@ func (h itemRestoreHandler) GetRootFolder(
func augmentItemInfo(
dii details.ItemInfo,
item models.DriveItemable,
item CorsoDriveItemable,
size int64,
parentPath *path.Builder,
) details.ItemInfo {

View File

@ -77,7 +77,7 @@ func (h libraryBackupHandler) NewItemPager(
func (h libraryBackupHandler) AugmentItemInfo(
dii details.ItemInfo,
item models.DriveItemable,
item CorsoDriveItemable,
size int64,
parentPath *path.Builder,
) details.ItemInfo {
@ -183,7 +183,7 @@ func (h libraryRestoreHandler) NewDrivePager(
func (h libraryRestoreHandler) AugmentItemInfo(
dii details.ItemInfo,
item models.DriveItemable,
item CorsoDriveItemable,
size int64,
parentPath *path.Builder,
) details.ItemInfo {
@ -268,7 +268,7 @@ func (h libraryRestoreHandler) GetRootFolder(
func augmentLibraryItemInfo(
dii details.ItemInfo,
item models.DriveItemable,
item CorsoDriveItemable,
size int64,
parentPath *path.Builder,
) details.ItemInfo {

View File

@ -842,8 +842,8 @@ func restoreFile(
}
defer closeProgressBar()
dii := ir.AugmentItemInfo(details.ItemInfo{}, newItem, written, nil)
cdi := ToCorsoDriveItemable(newItem)
dii := ir.AugmentItemInfo(details.ItemInfo{}, cdi, written, nil)
if shouldDeleteOriginal {
ctr.Inc(count.CollisionReplace)

View File

@ -369,27 +369,40 @@ func (op *BackupOperation) do(
lastBackupVersion = mans.MinBackupVersion()
}
// TODO(ashmrtn): This should probably just return a collection that deletes
// the entire subtree instead of returning an additional bool. That way base
// selection is controlled completely by flags and merging is controlled
// completely by collections.
cs, ssmb, canUsePreviousBackup, err := produceBackupDataCollections(
ctx,
op.bp,
op.ResourceOwner,
op.Selectors,
mdColls,
lastBackupVersion,
op.Options,
op.Errors)
if err != nil {
return nil, clues.Wrap(err, "producing backup data collections")
}
// Run 3 times and exit
cs := []data.BackupCollection{}
canUsePreviousBackup := false
ctx = clues.Add(
ctx,
"can_use_previous_backup", canUsePreviousBackup,
"collection_count", len(cs))
var maxCount int = 2
for i := 0; i < maxCount; i++ {
logger.Ctx(ctx).Info("delta query iteration")
cs, _, canUsePreviousBackup, err := produceBackupDataCollections(
ctx,
op.bp,
op.ResourceOwner,
op.Selectors,
mdColls,
lastBackupVersion,
op.Options,
op.Errors)
if err != nil {
return nil, clues.Wrap(err, "producing backup data collections")
}
ctx = clues.Add(
ctx,
"can_use_previous_backup", canUsePreviousBackup,
"collection_count", len(cs))
// sleep for 5 mins
//time.Sleep(5 * time.Minute)
if i == maxCount-1 {
return nil, clues.New("unable to produce backup collections").WithClues(ctx)
}
}
writeStats, deets, toMerge, err := consumeBackupCollections(
ctx,
@ -398,7 +411,7 @@ func (op *BackupOperation) do(
reasons,
mans,
cs,
ssmb,
nil,
backupID,
op.incremental && canUseMetadata && canUsePreviousBackup,
op.Errors)