Compare commits

...

11 Commits

Author SHA1 Message Date
Abhishek Pandey
e93bc47660 remove file, folder etc logic 2023-11-20 09:28:33 -08:00
Abhishek Pandey
d8d4947bbf only have Id 2023-11-19 17:44:03 -08:00
Abhishek Pandey
b1cf23bee3 remove sizeof pkg 2023-11-17 20:30:38 -08:00
Abhishek Pandey
ba8e5828b8 4min pause before gc 2023-11-17 19:59:03 -08:00
Abhishek Pandey
b148be72dc 1GB gomemlim 2023-11-17 19:19:16 -08:00
Abhishek Pandey
dbaaac1a6e profile on 3GB 2023-11-17 18:08:13 -08:00
Abhishek Pandey
aa2522d03c Only do 1 iteration 2023-11-17 17:51:19 -08:00
Abhishek Pandey
6aac9b521f Take a profile if > 5GB mem usage 2023-11-17 17:49:07 -08:00
Abhishek Pandey
022ffd56b6 Add gomemlimit 2023-11-17 17:22:02 -08:00
Abhishek Pandey
a30b8badba remove local pkg 2023-11-17 15:59:53 -08:00
Abhishek Pandey
52b020c416 profile 2023-11-17 15:57:43 -08:00
8 changed files with 508 additions and 317 deletions

View File

@ -1,9 +1,102 @@
package main
import (
"context"
"log"
"os"
"runtime"
"runtime/debug"
"runtime/pprof"
"strconv"
"time"
"github.com/alcionai/corso/src/cli"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/pkg/profile"
)
var profileTicker = time.NewTicker(1 * time.Second)
var perMinuteMap = make(map[time.Time]int)
var timeSinceRefresh = time.Now()
//var profileTicker = time.NewTicker(120 * time.Second)
var printTicker = time.NewTicker(1 * time.Second)
var profileCounter = 0
func main() {
defer profile.Start(profile.MemProfile).Stop()
debug.SetMemoryLimit(1 * 1024 * 1024 * 1024)
go func() {
for {
select {
case <-profileTicker.C:
var m runtime.MemStats
runtime.ReadMemStats(&m)
// if mem > 3GB and we havent captured a profile this min, capture it
// or if its been 2 mins since last profile, capture it
t := time.Now().Truncate(time.Minute)
//if (m.HeapAlloc > uint64(3*1024*1024*1024) && perMinuteMap[t] == 0) || time.Since(timeSinceRefresh) > 2*time.Minute {
if time.Since(timeSinceRefresh) > 2*time.Minute {
filename := "mem." + strconv.Itoa(profileCounter) + ".pprof"
f, _ := os.Create(filename)
if err := pprof.WriteHeapProfile(f); err != nil {
log.Fatal("could not write memory profile: ", err)
}
f.Close()
profileCounter++
perMinuteMap[t] = 1
timeSinceRefresh = time.Now()
}
}
}
}()
go func() {
for {
select {
case <-printTicker.C:
PrintMemUsage()
}
}
}()
cli.Handle()
}
// PrintMemUsage outputs the current, total and OS memory being used. As well as the number
// of garage collection cycles completed.
func PrintMemUsage() {
ctx := context.Background()
var m runtime.MemStats
runtime.ReadMemStats(&m)
// For info on each, see: https://golang.org/pkg/runtime/#MemStats
// logger.Ctx(ctx).Info("Alloc = ", bToMb(m.Alloc), " MB")
// logger.Ctx(ctx).Info("TotalAlloc = ", bToMb(m.TotalAlloc), " MB")
logger.Ctx(ctx).Info("HeapAlloc = ", bToMb(m.HeapAlloc), " MB") // same as Alloc
logger.Ctx(ctx).Info("HeapReleased = ", bToMb(m.HeapReleased), " MB")
logger.Ctx(ctx).Info("HeapObjects = ", bToMb(m.HeapObjects), " MB")
logger.Ctx(ctx).Info("HeapSys = ", bToMb(m.HeapSys), " MB")
logger.Ctx(ctx).Info("HeapIdle = ", bToMb(m.HeapIdle), " MB")
logger.Ctx(ctx).Info("HeapInuse = ", bToMb(m.HeapInuse), " MB")
// logger.Ctx(ctx).Info("Mallocs = ", bToMb(m.Mallocs), " MB")
// logger.Ctx(ctx).Info("Frees = ", bToMb(m.Frees), " MB")
// logger.Ctx(ctx).Info("StackInuse = ", bToMb(m.StackInuse), " MB")
// logger.Ctx(ctx).Info("StackSys = ", bToMb(m.StackSys), " MB")
// logger.Ctx(ctx).Info("Sys = ", bToMb(m.Sys), " MB")
logger.Ctx(ctx).Info("NumGC = ", m.NumGC)
}
func bToMb(b uint64) uint64 {
return b / 1024 / 1024
}

View File

@ -3,6 +3,8 @@ module github.com/alcionai/corso/src
go 1.21
replace github.com/kopia/kopia => github.com/alcionai/kopia v0.12.2-0.20230822191057-17d4deff94a3
//replace github.com/DmitriyVTitov/size => /Users/pandeyab/temp/size
require (
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.3.1
@ -46,15 +48,18 @@ require (
)
require (
github.com/DmitriyVTitov/size v1.5.0 // indirect
github.com/VividCortex/ewma v1.2.0 // indirect
github.com/acarl005/stripansi v0.0.0-20180116102854-5a71ef0e047d // indirect
github.com/andybalholm/brotli v1.0.6 // indirect
github.com/aws/aws-sdk-go v1.47.9 // indirect
github.com/cention-sany/utf7 v0.0.0-20170124080048-26cad61bd60a // indirect
github.com/felixge/fgprof v0.9.3 // indirect
github.com/fsnotify/fsnotify v1.6.0 // indirect
github.com/gofrs/flock v0.8.1 // indirect
github.com/gogs/chardet v0.0.0-20211120154057-b7413eaefb8f // indirect
github.com/google/go-cmp v0.5.9 // indirect
github.com/google/pprof v0.0.0-20231101202521-4ca4178f5c7a // indirect
github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect
github.com/hashicorp/cronexpr v1.1.2 // indirect
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
@ -64,6 +69,7 @@ require (
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/olekukonko/tablewriter v0.0.5 // indirect
github.com/pelletier/go-toml/v2 v2.1.0 // indirect
github.com/pkg/profile v1.7.0 // indirect
github.com/sagikazarmark/locafero v0.3.0 // indirect
github.com/sagikazarmark/slog-shim v0.1.0 // indirect
github.com/sourcegraph/conc v0.3.0 // indirect

View File

@ -49,6 +49,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym
github.com/DATA-DOG/go-sqlmock v1.4.1 h1:ThlnYciV1iM/V0OSF/dtkqWb6xo5qITT1TJBG1MRDJM=
github.com/DATA-DOG/go-sqlmock v1.4.1/go.mod h1:f/Ixk793poVmq4qj/V1dPUg2JEAKC73Q5eFN3EC/SaM=
github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ=
github.com/DmitriyVTitov/size v1.5.0 h1:/PzqxYrOyOUX1BXj6J9OuVRVGe+66VL4D9FlUaW515g=
github.com/DmitriyVTitov/size v1.5.0/go.mod h1:le6rNI4CoLQV1b9gzp1+3d7hMAD/uu2QcJ+aYbNgiU0=
github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5 h1:IEjq88XO4PuBDcvmjQJcQGg+w+UaafSy8G5Kcb5tBhI=
github.com/GehirnInc/crypt v0.0.0-20230320061759-8cc1b52080c5/go.mod h1:exZ0C/1emQJAw5tHOaUDyY1ycttqBAPcxuzf7QbY6ec=
github.com/VividCortex/ewma v1.2.0 h1:f58SaIzcDXrSy3kWaHNvuJgJ3Nmz59Zji6XoJR/q1ow=
@ -121,6 +123,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.m
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM=
github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE=
github.com/felixge/fgprof v0.9.3 h1:VvyZxILNuCiUCSXtPtYmmtGvb65nqXh2QFWc0Wpf2/g=
github.com/felixge/fgprof v0.9.3/go.mod h1:RdbpDgzqYVh/T9fPELJyV7EYJuHB55UTEULNun8eiPw=
github.com/frankban/quicktest v1.14.4 h1:g2rn0vABPOOXmZUj+vbmUp0lPoXEMuhTpIluN0XL9UY=
github.com/frankban/quicktest v1.14.4/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.6.0 h1:n+5WquG0fcWoWp6xPWfHdbskMCQaFnG6PfBrh1Ky4HY=
@ -155,6 +159,7 @@ github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfU
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
@ -207,6 +212,9 @@ github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hf
github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20211214055906-6f57359322fd/go.mod h1:KgnwoLYCZ8IQu3XUZ8Nc/bM9CCZFOyjUNOSygVozoDg=
github.com/google/pprof v0.0.0-20231101202521-4ca4178f5c7a h1:fEBsGL/sjAuJrgah5XqmmYsTLzJp/TO9Lhy39gkverk=
github.com/google/pprof v0.0.0-20231101202521-4ca4178f5c7a/go.mod h1:czg5+yv1E0ZGTi6S6vVK1mke0fV+FaUhNGcd6VRS9Ik=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/google/uuid v1.4.0 h1:MtMxsa51/r9yyhkyLsVeVt0B+BGQZzpQiTQ4eHZ8bc4=
@ -241,6 +249,7 @@ github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4=
github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20210905161508-09a460cdf81d/go.mod h1:aYm2/VgdVmcIU8iMfdMvDMsRAQjcfZSKFby6HOFvi/w=
github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8=
github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw=
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
@ -353,6 +362,8 @@ github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINE
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/profile v1.7.0 h1:hnbDkaNWPCLMO9wGLdBFTIZvzDrDfBM2072E1S9gJkA=
github.com/pkg/profile v1.7.0/go.mod h1:8Uer0jas47ZQMJ7VD+OHknK4YDY07LPUC6dEvqDjvNo=
github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U=
@ -631,6 +642,7 @@ golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20210616045830-e2b7044e8c71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211007075335-d3039528d8ac/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View File

@ -92,6 +92,10 @@ type Collection struct {
counter *count.Bus
}
func (c *Collection) GetDriveItemsMap() map[string]models.DriveItemable {
return c.driveItems
}
func pathToLocation(p path.Path) (*path.Builder, error) {
if p == nil {
return nil, nil

View File

@ -21,7 +21,6 @@ import (
"github.com/alcionai/corso/src/pkg/control"
"github.com/alcionai/corso/src/pkg/count"
"github.com/alcionai/corso/src/pkg/fault"
"github.com/alcionai/corso/src/pkg/filters"
"github.com/alcionai/corso/src/pkg/logger"
"github.com/alcionai/corso/src/pkg/path"
"github.com/alcionai/corso/src/pkg/services/m365/api"
@ -699,45 +698,45 @@ func (c *Collections) getCollectionPath(
item models.DriveItemable,
) (path.Path, error) {
var (
pb = odConsts.DriveFolderPrefixBuilder(driveID)
isRoot = item.GetRoot() != nil
isFile = item.GetFile() != nil
pb = odConsts.DriveFolderPrefixBuilder(driveID)
// isRoot = item.GetRoot() != nil
// isFile = item.GetFile() != nil
)
if !isRoot {
if item.GetParentReference() == nil ||
item.GetParentReference().GetPath() == nil {
err := clues.New("no parent reference").
With("item_name", clues.Hide(ptr.Val(item.GetName())))
// if !isRoot {
// if item.GetParentReference() == nil ||
// item.GetParentReference().GetPath() == nil {
// err := clues.New("no parent reference").
// With("item_name", clues.Hide(ptr.Val(item.GetName())))
return nil, err
}
// return nil, err
// }
pb = path.Builder{}.Append(path.Split(ptr.Val(item.GetParentReference().GetPath()))...)
}
// pb = path.Builder{}.Append(path.Split(ptr.Val(item.GetParentReference().GetPath()))...)
// }
collectionPath, err := c.handler.CanonicalPath(pb, c.tenantID)
if err != nil {
return nil, clues.Wrap(err, "making item path")
}
if isRoot || isFile {
return collectionPath, nil
}
// if isRoot || isFile {
// return collectionPath, nil
// }
// Append folder name to path since we want the path for the collection, not
// the path for the parent of the collection. The root and files don't need
// to append an extra element because the root already refers to itself and
// the collection containing the item is the parent path.
name := ptr.Val(item.GetName())
if len(name) == 0 {
return nil, clues.New("folder with empty name")
}
// // Append folder name to path since we want the path for the collection, not
// // the path for the parent of the collection. The root and files don't need
// // to append an extra element because the root already refers to itself and
// // the collection containing the item is the parent path.
// name := ptr.Val(item.GetName())
// if len(name) == 0 {
// return nil, clues.New("folder with empty name")
// }
collectionPath, err = collectionPath.Append(false, name)
if err != nil {
return nil, clues.Wrap(err, "making non-root folder path")
}
// collectionPath, err = collectionPath.Append(false, name)
// if err != nil {
// return nil, clues.Wrap(err, "making non-root folder path")
// }
return collectionPath, nil
}
@ -780,6 +779,24 @@ func (c *Collections) PopulateDriveCollections(
maps.Copy(newPrevPaths, oldPrevPaths)
}
col, err := NewCollection(
c.handler,
c.protectedResource,
nil,
nil,
driveID,
c.statusUpdater,
c.ctrl,
false,
false,
nil,
counter.Local())
if err != nil {
return pagers.DeltaUpdate{}, newPrevPaths, el.Failure()
}
c.CollectionMap[driveID][driveID] = col
pager := c.handler.EnumerateDriveItemsDelta(
ctx,
driveID,
@ -855,233 +872,246 @@ func (c *Collections) processItem(
counter *count.Bus,
skipper fault.AddSkipper,
) error {
var (
itemID = ptr.Val(item.GetId())
itemName = ptr.Val(item.GetName())
isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil
)
ctx = clues.Add(
ctx,
"item_id", itemID,
"item_name", clues.Hide(itemName),
"item_is_folder", isFolder)
if item.GetMalware() != nil {
addtl := graph.ItemInfo(item)
skip := fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, addtl)
if isFolder {
skip = fault.ContainerSkip(fault.SkipMalware, driveID, itemID, itemName, addtl)
}
skipper.AddSkip(ctx, skip)
logger.Ctx(ctx).Infow("malware detected", "item_details", addtl)
counter.Inc(count.Malware)
return nil
collection, ok := c.CollectionMap[driveID][driveID]
if !ok {
return clues.New("no collection for drive").With("drive_id", driveID)
}
// Deleted file or folder.
if item.GetDeleted() != nil {
err := c.handleDelete(
ctx,
itemID,
driveID,
oldPrevPaths,
currPrevPaths,
newPrevPaths,
isFolder,
excludedItemIDs,
invalidPrevDelta,
counter)
return clues.StackWC(ctx, err).OrNil()
}
collectionPath, err := c.getCollectionPath(driveID, item)
if err != nil {
return clues.StackWC(ctx, err).Label(fault.LabelForceNoBackupCreation, count.BadCollPath)
}
// Skip items that don't match the folder selectors we were given.
if shouldSkip(ctx, collectionPath, c.handler, driveName) {
counter.Inc(count.SkippedContainers)
logger.Ctx(ctx).Debugw("path not selected", "skipped_path", collectionPath.String())
return nil
}
switch {
case isFolder:
// Deletions are handled above so this is just moves/renames.
var prevPath path.Path
prevPathStr, ok := oldPrevPaths[itemID]
if ok {
prevPath, err = path.FromDataLayerPath(prevPathStr, false)
if err != nil {
return clues.WrapWC(ctx, err, "invalid previous path").
With("prev_path_string", path.LoggableDir(prevPathStr)).
Label(count.BadPrevPath)
}
} else if item.GetRoot() != nil {
// Root doesn't move or get renamed.
prevPath = collectionPath
}
// Moved folders don't cause delta results for any subfolders nested in
// them. We need to go through and update paths to handle that. We only
// update newPaths so we don't accidentally clobber previous deletes.
updatePath(newPrevPaths, itemID, collectionPath.String())
found, err := updateCollectionPaths(
driveID,
itemID,
c.CollectionMap,
collectionPath)
if err != nil {
return clues.StackWC(ctx, err)
}
if found {
return nil
}
isPackage := item.GetPackageEscaped() != nil
if isPackage {
counter.Inc(count.Packages)
// mark this path as a package type for all other collections.
// any subfolder should get marked as a childOfPackage below.
topLevelPackages[collectionPath.String()] = struct{}{}
} else {
counter.Inc(count.Folders)
}
childOfPackage := filters.
PathPrefix(maps.Keys(topLevelPackages)).
Compare(collectionPath.String())
// This check is to ensure that if a folder was deleted and
// recreated multiple times between a backup, we only use the
// final one.
alreadyHandledFolderID, collPathAlreadyExists := seenFolders[collectionPath.String()]
collPathAlreadyExists = collPathAlreadyExists && alreadyHandledFolderID != itemID
if collPathAlreadyExists {
// we don't have a good way of juggling multiple previous paths
// at this time. If a path was declared twice, it's a bit ambiguous
// which prior data the current folder now contains. Safest thing to
// do is to call it a new folder and ingest items fresh.
prevPath = nil
c.NumContainers--
c.NumItems--
delete(c.CollectionMap[driveID], alreadyHandledFolderID)
delete(newPrevPaths, alreadyHandledFolderID)
}
if invalidPrevDelta {
prevPath = nil
}
seenFolders[collectionPath.String()] = itemID
col, err := NewCollection(
c.handler,
c.protectedResource,
collectionPath,
prevPath,
driveID,
c.statusUpdater,
c.ctrl,
isPackage || childOfPackage,
invalidPrevDelta || collPathAlreadyExists,
nil,
counter.Local())
if err != nil {
return clues.StackWC(ctx, err)
}
col.driveName = driveName
c.CollectionMap[driveID][itemID] = col
c.NumContainers++
if item.GetRoot() != nil {
return nil
}
// Add an entry to fetch permissions into this collection. This assumes
// that OneDrive always returns all folders on the path of an item
// before the item. This seems to hold true for now at least.
if col.Add(item) {
c.NumItems++
}
case item.GetFile() != nil:
counter.Inc(count.Files)
// Deletions are handled above so this is just moves/renames.
if len(ptr.Val(item.GetParentReference().GetId())) == 0 {
return clues.NewWC(ctx, "file without parent ID").Label(count.MissingParent)
}
// Get the collection for this item.
parentID := ptr.Val(item.GetParentReference().GetId())
ctx = clues.Add(ctx, "parent_id", parentID)
collection, ok := c.CollectionMap[driveID][parentID]
if !ok {
return clues.NewWC(ctx, "item seen before parent folder").Label(count.ItemBeforeParent)
}
// This will only kick in if the file was moved multiple times
// within a single delta query. We delete the file from the previous
// collection so that it doesn't appear in two places.
prevParentContainerID, alreadyAdded := currPrevPaths[itemID]
if alreadyAdded {
prevColl, found := c.CollectionMap[driveID][prevParentContainerID]
if !found {
return clues.NewWC(ctx, "previous collection not found").
With("prev_parent_container_id", prevParentContainerID)
}
if ok := prevColl.Remove(itemID); !ok {
return clues.NewWC(ctx, "removing item from prev collection").
With("prev_parent_container_id", prevParentContainerID)
}
}
currPrevPaths[itemID] = parentID
// Only increment counters if the file didn't already get counted (i.e. it's
// not an item that was either updated or moved during the delta query).
if collection.Add(item) && !alreadyAdded {
c.NumItems++
c.NumFiles++
}
// Do this after adding the file to the collection so if we fail to add
// the item to the collection for some reason and we're using best effort
// we don't just end up deleting the item in the resulting backup. The
// resulting backup will be slightly incorrect, but it will have the most
// data that we were able to preserve.
if !invalidPrevDelta {
// Always add a file to the excluded list. The file may have been
// renamed/moved/modified, so we still have to drop the
// original one and download a fresh copy.
excludedItemIDs[itemID+metadata.DataFileSuffix] = struct{}{}
excludedItemIDs[itemID+metadata.MetaFileSuffix] = struct{}{}
}
default:
return clues.NewWC(ctx, "item is neither folder nor file").
Label(fault.LabelForceNoBackupCreation, count.UnknownItemType)
if collection.Add(item) {
c.NumItems++
c.NumFiles++
}
return nil
// var (
// itemID = ptr.Val(item.GetId())
// itemName = ptr.Val(item.GetName())
// isFolder = item.GetFolder() != nil || item.GetPackageEscaped() != nil
// )
// ctx = clues.Add(
// ctx,
// "item_id", itemID,
// "item_name", clues.Hide(itemName),
// "item_is_folder", isFolder)
// if item.GetMalware() != nil {
// addtl := graph.ItemInfo(item)
// skip := fault.FileSkip(fault.SkipMalware, driveID, itemID, itemName, addtl)
// if isFolder {
// skip = fault.ContainerSkip(fault.SkipMalware, driveID, itemID, itemName, addtl)
// }
// skipper.AddSkip(ctx, skip)
// logger.Ctx(ctx).Infow("malware detected", "item_details", addtl)
// counter.Inc(count.Malware)
// return nil
// }
// // Deleted file or folder.
// if item.GetDeleted() != nil {
// err := c.handleDelete(
// ctx,
// itemID,
// driveID,
// oldPrevPaths,
// currPrevPaths,
// newPrevPaths,
// isFolder,
// excludedItemIDs,
// invalidPrevDelta,
// counter)
// return clues.StackWC(ctx, err).OrNil()
// }
// collectionPath, err := c.getCollectionPath(driveID, item)
// if err != nil {
// return clues.StackWC(ctx, err).Label(fault.LabelForceNoBackupCreation, count.BadCollPath)
// }
// // Skip items that don't match the folder selectors we were given.
// if shouldSkip(ctx, collectionPath, c.handler, driveName) {
// counter.Inc(count.SkippedContainers)
// logger.Ctx(ctx).Debugw("path not selected", "skipped_path", collectionPath.String())
// return nil
// }
// switch {
// // case isFolder:
// // // Deletions are handled above so this is just moves/renames.
// // var prevPath path.Path
// // prevPathStr, ok := oldPrevPaths[itemID]
// // if ok {
// // prevPath, err = path.FromDataLayerPath(prevPathStr, false)
// // if err != nil {
// // return clues.WrapWC(ctx, err, "invalid previous path").
// // With("prev_path_string", path.LoggableDir(prevPathStr)).
// // Label(count.BadPrevPath)
// // }
// // } else if item.GetRoot() != nil {
// // // Root doesn't move or get renamed.
// // prevPath = collectionPath
// // }
// // // Moved folders don't cause delta results for any subfolders nested in
// // // them. We need to go through and update paths to handle that. We only
// // // update newPaths so we don't accidentally clobber previous deletes.
// // updatePath(newPrevPaths, itemID, collectionPath.String())
// // found, err := updateCollectionPaths(
// // driveID,
// // itemID,
// // c.CollectionMap,
// // collectionPath)
// // if err != nil {
// // return clues.StackWC(ctx, err)
// // }
// // if found {
// // return nil
// // }
// // isPackage := item.GetPackageEscaped() != nil
// // if isPackage {
// // counter.Inc(count.Packages)
// // // mark this path as a package type for all other collections.
// // // any subfolder should get marked as a childOfPackage below.
// // topLevelPackages[collectionPath.String()] = struct{}{}
// // } else {
// // counter.Inc(count.Folders)
// // }
// // childOfPackage := filters.
// // PathPrefix(maps.Keys(topLevelPackages)).
// // Compare(collectionPath.String())
// // // This check is to ensure that if a folder was deleted and
// // // recreated multiple times between a backup, we only use the
// // // final one.
// // alreadyHandledFolderID, collPathAlreadyExists := seenFolders[collectionPath.String()]
// // collPathAlreadyExists = collPathAlreadyExists && alreadyHandledFolderID != itemID
// // if collPathAlreadyExists {
// // // we don't have a good way of juggling multiple previous paths
// // // at this time. If a path was declared twice, it's a bit ambiguous
// // // which prior data the current folder now contains. Safest thing to
// // // do is to call it a new folder and ingest items fresh.
// // prevPath = nil
// // c.NumContainers--
// // c.NumItems--
// // delete(c.CollectionMap[driveID], alreadyHandledFolderID)
// // delete(newPrevPaths, alreadyHandledFolderID)
// // }
// // if invalidPrevDelta {
// // prevPath = nil
// // }
// // seenFolders[collectionPath.String()] = itemID
// // col, err := NewCollection(
// // c.handler,
// // c.protectedResource,
// // collectionPath,
// // prevPath,
// // driveID,
// // c.statusUpdater,
// // c.ctrl,
// // isPackage || childOfPackage,
// // invalidPrevDelta || collPathAlreadyExists,
// // nil,
// // counter.Local())
// // if err != nil {
// // return clues.StackWC(ctx, err)
// // }
// // col.driveName = driveName
// // c.CollectionMap[driveID][itemID] = col
// // c.NumContainers++
// // if item.GetRoot() != nil {
// // return nil
// // }
// // // Add an entry to fetch permissions into this collection. This assumes
// // // that OneDrive always returns all folders on the path of an item
// // // before the item. This seems to hold true for now at least.
// // if col.Add(item) {
// // c.NumItems++
// // }
// case item.GetFile() != nil:
// counter.Inc(count.Files)
// // Deletions are handled above so this is just moves/renames.
// if len(ptr.Val(item.GetParentReference().GetId())) == 0 {
// return clues.NewWC(ctx, "file without parent ID").Label(count.MissingParent)
// }
// // Get the collection for this item.
// parentID := ptr.Val(item.GetParentReference().GetId())
// ctx = clues.Add(ctx, "parent_id", parentID)
// collection, ok := c.CollectionMap[driveID][parentID]
// if !ok {
// return clues.NewWC(ctx, "item seen before parent folder").Label(count.ItemBeforeParent)
// }
// // This will only kick in if the file was moved multiple times
// // within a single delta query. We delete the file from the previous
// // collection so that it doesn't appear in two places.
// prevParentContainerID, alreadyAdded := currPrevPaths[itemID]
// if alreadyAdded {
// prevColl, found := c.CollectionMap[driveID][prevParentContainerID]
// if !found {
// return clues.NewWC(ctx, "previous collection not found").
// With("prev_parent_container_id", prevParentContainerID)
// }
// if ok := prevColl.Remove(itemID); !ok {
// return clues.NewWC(ctx, "removing item from prev collection").
// With("prev_parent_container_id", prevParentContainerID)
// }
// }
// currPrevPaths[itemID] = parentID
// // Only increment counters if the file didn't already get counted (i.e. it's
// // not an item that was either updated or moved during the delta query).
// if collection.Add(item) && !alreadyAdded {
// c.NumItems++
// c.NumFiles++
// }
// // Do this after adding the file to the collection so if we fail to add
// // the item to the collection for some reason and we're using best effort
// // we don't just end up deleting the item in the resulting backup. The
// // resulting backup will be slightly incorrect, but it will have the most
// // data that we were able to preserve.
// if !invalidPrevDelta {
// // Always add a file to the excluded list. The file may have been
// // renamed/moved/modified, so we still have to drop the
// // original one and download a fresh copy.
// excludedItemIDs[itemID+metadata.DataFileSuffix] = struct{}{}
// excludedItemIDs[itemID+metadata.MetaFileSuffix] = struct{}{}
// }
// default:
// return clues.NewWC(ctx, "item is neither folder nor file").
// Label(fault.LabelForceNoBackupCreation, count.UnknownItemType)
// }
// return nil
}
type dirScopeChecker interface {

View File

@ -16,6 +16,7 @@ import (
"github.com/alcionai/corso/src/internal/events"
"github.com/alcionai/corso/src/internal/kopia"
kinject "github.com/alcionai/corso/src/internal/kopia/inject"
"github.com/alcionai/corso/src/internal/m365/collection/drive"
"github.com/alcionai/corso/src/internal/model"
"github.com/alcionai/corso/src/internal/observe"
"github.com/alcionai/corso/src/internal/operations/inject"
@ -439,67 +440,111 @@ func (op *BackupOperation) do(
lastBackupVersion = mans.MinBackupVersion()
}
// TODO(ashmrtn): This should probably just return a collection that deletes
// the entire subtree instead of returning an additional bool. That way base
// selection is controlled completely by flags and merging is controlled
// completely by collections.
cs, ssmb, canUsePreviousBackup, err := produceBackupDataCollections(
ctx,
op.bp,
op.ResourceOwner,
op.Selectors,
mdColls,
lastBackupVersion,
op.Options,
op.Counter,
op.Errors)
if err != nil {
return nil, clues.Wrap(err, "producing backup data collections")
iterations := 1
for i := 0; i < iterations; i++ {
// TODO(ashmrtn): This should probably just return a collection that deletes
// the entire subtree instead of returning an additional bool. That way base
// selection is controlled completely by flags and merging is controlled
// completely by collections.
cs, _, _, err := produceBackupDataCollections(
ctx,
op.bp,
op.ResourceOwner,
op.Selectors,
mdColls,
lastBackupVersion,
op.Options,
op.Counter,
op.Errors)
if err != nil {
return nil, clues.Wrap(err, "producing backup data collections")
}
// Sleep for 4 mins to let the memory usage settle down so that we have a better
// picture. Also allows pprof to run twice during this time.
// Do some meaningless work after to make sure the collections dont get garbage collected
time.Sleep(4 * time.Minute)
// sum := 0
// numItems := 0
// mapSum := 0
for _, c := range cs {
v, ok := c.(*drive.Collection)
if !ok {
continue
}
m := v.GetDriveItemsMap()
for key := range m {
logger.Ctx(ctx).Debug(key)
}
// Get sizeof recursively using reflect
// m := v.GetDriveItemsMap()
// for _, val := range m {
// s := size.Of(val)
// sum += s
// numItems++
// }
// ms := size.Of(m)
// mapSum += ms
// logger.Ctx(ctx).Debugf("coll drive map size %d, num drive items %d\n", ms, len(m))
}
// print total sum
// logger.Ctx(ctx).Debugf("itemSum %d, map sum %d, total items %d, mem used per item %f mem per item in map %f \n", sum, mapSum, numItems, float64(sum)/float64(numItems), float64(mapSum)/float64(numItems))
}
ctx = clues.Add(
ctx,
"can_use_previous_backup", canUsePreviousBackup,
"collection_count", len(cs))
return nil, clues.New("failed")
writeStats, deets, toMerge, err := consumeBackupCollections(
ctx,
op.kopia,
op.account.ID(),
reasons,
mans,
cs,
ssmb,
backupID,
op.incremental && canUseMetadata && canUsePreviousBackup,
op.Counter,
op.Errors)
if err != nil {
return nil, clues.Wrap(err, "persisting collection backups")
}
// ctx = clues.Add(
// ctx,
// "can_use_previous_backup", canUsePreviousBackup,
// "collection_count", len(cs))
opStats.hasNewDetailEntries = (deets != nil && !deets.Empty()) ||
(toMerge != nil && toMerge.ItemsToMerge() > 0)
opStats.k = writeStats
// writeStats, deets, toMerge, err := consumeBackupCollections(
// ctx,
// op.kopia,
// op.account.ID(),
// reasons,
// mans,
// cs,
// ssmb,
// backupID,
// op.incremental && canUseMetadata && canUsePreviousBackup,
// op.Counter,
// op.Errors)
// if err != nil {
// return nil, clues.Wrap(err, "persisting collection backups")
// }
err = mergeDetails(
ctx,
detailsStore,
mans,
toMerge,
deets,
writeStats,
op.Selectors.PathService(),
op.Errors)
if err != nil {
return nil, clues.Wrap(err, "merging details")
}
// opStats.hasNewDetailEntries = (deets != nil && !deets.Empty()) ||
// (toMerge != nil && toMerge.ItemsToMerge() > 0)
// opStats.k = writeStats
opStats.ctrl = op.bp.Wait()
// err = mergeDetails(
// ctx,
// detailsStore,
// mans,
// toMerge,
// deets,
// writeStats,
// op.Selectors.PathService(),
// op.Errors)
// if err != nil {
// return nil, clues.Wrap(err, "merging details")
// }
logger.Ctx(ctx).Debug(opStats.ctrl)
// opStats.ctrl = op.bp.Wait()
// logger.Ctx(ctx).Debug(opStats.ctrl)
// return deets, nil
return deets, nil
}
func makeFallbackReasons(tenant string, sel selectors.Selector) ([]identity.Reasoner, error) {

0
src/mem.0.pprof Normal file
View File

View File

@ -107,21 +107,22 @@ func idAnd(ss ...string) []string {
func DefaultDriveItemProps() []string {
return idAnd(
"content.downloadUrl",
"createdBy",
"createdDateTime",
"file",
"folder",
"lastModifiedDateTime",
"name",
"package",
"parentReference",
"root",
"sharepointIds",
"size",
"deleted",
"malware",
"shared")
// "content.downloadUrl",
// "createdBy",
// "createdDateTime",
// "file",
// "folder",
// "lastModifiedDateTime",
// "name",
// "package",
// "parentReference",
// "root",
// "sharepointIds",
// "size",
// "deleted",
// "malware",
// "shared")
)
}
// URL cache only needs to fetch a small subset of item properties