From bc7792744e70a83d73187e89cfeb694ea68727b0 Mon Sep 17 00:00:00 2001 From: Abhishek Pandey Date: Mon, 12 Jun 2023 11:15:19 -0700 Subject: [PATCH] Don't dump response body while checking for malware (#3592) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Corso detects malware in a few places for OD/SP. 1. During item enumeration, using [malware property](https://learn.microsoft.com/en-us/graph/api/resources/driveitem?view=graph-rest-1.0#properties) which is set by graph - [code](https://github.com/alcionai/corso/blob/ed47c134b09a19dd6b4869f605f3a384f5763c06/src/internal/connector/onedrive/collections.go#L658) 2. During item fetch using download URL - [code](https://github.com/alcionai/corso/blob/ed47c134b09a19dd6b4869f605f3a384f5763c06/src/internal/connector/onedrive/item.go#LL53C18-L53C18) - Graph seems to return 403 with `{"error":{"code":"malwareDetected","message":"Malware detected"}}`. Tested with eicar. 3. By analyzing http response headers in `IsMalwareResp`  Don't see these headers documented by graph. This is for [sharepoint protocol compatibility](https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/ms-wdvmoduu/6fa6d4a9-ac18-4cd7-b696-8a3b14a98291) it seems. 4. By analyzing response body in `IsMalwareResp` . We check for malwareDetected string inside http response body. We are accidentally dumping entire file ( http response dump) while checking for malware in 4. This is leading to high mem utilization/OOMs, especially while processing very large files. I don't think we need 4 at all. If graph is the entity which detects malware, I think we will never get past 2. This PR removes item 4. --- #### Does this PR need a docs update or release note? - [ ] :white_check_mark: Yes, it's included - [ ] :clock1: Yes, but in a later PR - [x] :no_entry: No #### Type of change - [ ] :sunflower: Feature - [x] :bug: Bugfix - [ ] :world_map: Documentation - [ ] :robot: Supportability/Tests - [ ] :computer: CI/Deployment - [ ] :broom: Tech Debt/Cleanup #### Issue(s) * # #### Test Plan - [x] :muscle: Manual - [ ] :zap: Unit test - [ ] :green_heart: E2E --- src/internal/connector/graph/errors.go | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/src/internal/connector/graph/errors.go b/src/internal/connector/graph/errors.go index e72c6dd29..2f2427b6c 100644 --- a/src/internal/connector/graph/errors.go +++ b/src/internal/connector/graph/errors.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "net/http" - "net/http/httputil" "net/url" "os" "strings" @@ -18,7 +17,6 @@ import ( "github.com/alcionai/corso/src/internal/common/ptr" "github.com/alcionai/corso/src/pkg/fault" "github.com/alcionai/corso/src/pkg/filters" - "github.com/alcionai/corso/src/pkg/logger" ) // --------------------------------------------------------------------------- @@ -169,27 +167,11 @@ func IsMalware(err error) bool { } func IsMalwareResp(ctx context.Context, resp *http.Response) bool { - if resp == nil { - return false - } - // https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/ms-wsshp/ba4ee7a8-704c-4e9c-ab14-fa44c574bdf4 // https://learn.microsoft.com/en-us/openspecs/sharepoint_protocols/ms-wdvmoduu/6fa6d4a9-ac18-4cd7-b696-8a3b14a98291 - if len(resp.Header) > 0 && resp.Header.Get("X-Virus-Infected") == "true" { - return true - } - - respDump, err := httputil.DumpResponse(resp, true) - if err != nil { - logger.Ctx(ctx).Errorw("dumping http response", "error", err) - return false - } - - if strings.Contains(string(respDump), string(malwareDetected)) { - return true - } - - return false + return resp != nil && + len(resp.Header) > 0 && + resp.Header.Get("X-Virus-Infected") == "true" } func IsErrFolderExists(err error) bool {