corso/src/pkg/path/path.go
Keepers cdf26b7988
refactor exchange restore to use interfaces (#3456)
refactors exchange restore from near-duplicate per-category functions and switch-based process trees with interfaces.

At the top of restoring all collections, each category creates a categoryRestoreHandler to supply the necessary restore behavior.  The appropriate handler gets passed in to the collection restore, and all restore code after that takes a single path using a common restore interface to switch between categorical behavior.

---

#### Does this PR need a docs update or release note?

- [x]  No

#### Type of change

- [x] 🧹 Tech Debt/Cleanup

#### Issue(s)

* #1996

#### Test Plan

- [x]  Unit test
- [x] 💚 E2E
2023-06-02 00:58:09 +00:00

784 lines
20 KiB
Go

// Package path provides a set of functions for wrangling paths from the outside
// world into paths that corso can understand. Paths use the standard Unix path
// separator character '/'. If for some reason an individual element in a raw
// path contains the '/' character, it should be escaped with '\'. If the path
// contains '\' it should be escaped by turning it into '\\'.
//
// Paths can be split into elements by splitting on '/' if the '/' is not
// escaped. Additionally, corso may operate on segments in a path. Segments are
// made up of one or more path elements.
//
// Examples of paths splitting by elements and canonicalization with escaping:
// 1.
//
// input path: `this/is/a/path`
// elements of path: `this`, `is`, `a`, `path`
//
// 2.
//
// input path: `this/is\/a/path`
// elements of path: `this`, `is/a`, `path`
//
// 3.
//
// input path: `this/is\\/a/path`
// elements of path: `this`, `is\`, `a`, `path`
//
// 4.
//
// input path: `this/is\\\/a/path`
// elements of path: `this`, `is\/a`, `path`
//
// 5.
//
// input path: `this/is//a/path`
// elements of path: `this`, `is`, `a`, `path`
//
// 6.
//
// input path: `this/is\//a/path`
// elements of path: `this`, `is/`, `a`, `path`
//
// 7.
//
// input path: `this/is/a/path/`
// elements of path: `this`, `is`, `a`, `path`
//
// 8.
//
// input path: `this/is/a/path\/`
// elements of path: `this`, `is`, `a`, `path/`
package path
import (
"bytes"
"crypto/sha256"
"fmt"
"strings"
"github.com/alcionai/clues"
)
const (
escapeCharacter = '\\'
PathSeparator = '/'
shortRefCharacters = 12
)
var charactersToEscape = map[rune]struct{}{
PathSeparator: {},
escapeCharacter: {},
}
var (
errMissingSegment = clues.New("missing required path element")
errParsingPath = clues.New("parsing resource path")
)
// For now, adding generic functions to pull information from segments.
// Resources that don't have the requested information should return an empty
// string.
type Path interface {
String() string
Service() ServiceType
Category() CategoryType
Tenant() string
ResourceOwner() string
Folder(escaped bool) string
Folders() Elements
Item() string
// UpdateParent updates parent from old to new if the item/folder was
// parented by old path
UpdateParent(prev, cur Path) bool
// PopFront returns a Builder object with the first element (left-side)
// removed. As the resulting set of elements is no longer a valid resource
// path a Builder is returned instead.
PopFront() *Builder
// Dir returns a Path object with the right-most element removed if possible.
// If removing the right-most element would discard one of the required prefix
// elements then an error is returned.
Dir() (Path, error)
// Elements returns all the elements in the path. This is a temporary function
// and will likely be updated to handle encoded elements instead of clear-text
// elements in the future.
Elements() Elements
// Append returns a new Path object with the given element added to the end of
// the old Path if possible. If the old Path is an item Path then Append
// returns an error.
Append(isItem bool, elems ...string) (Path, error)
// AppendItem is a shorthand for Append(true, someItem)
AppendItem(item string) (Path, error)
// ShortRef returns a short reference representing this path. The short
// reference is guaranteed to be unique. No guarantees are made about whether
// a short reference can be converted back into the Path that generated it.
ShortRef() string
// ToBuilder returns a Builder instance that represents the current Path.
ToBuilder() *Builder
// Every path needs to comply with these funcs to ensure that PII
// is appropriately hidden from logging, errors, and other outputs.
clues.Concealer
fmt.Stringer
// In the rare case that the path needs to get printed as a plain string,
// without obscuring values for PII.
clues.PlainStringer
}
// interface compliance required for handling PII
var (
_ clues.Concealer = &Builder{}
_ fmt.Stringer = &Builder{}
)
// RestorePaths denotes the location to find an item in kopia and the path of
// the collection to place the item in for restore.
type RestorePaths struct {
StoragePath Path
RestorePath Path
}
// Builder is a simple path representation that only tracks path elements. It
// can join, escape, and unescape elements. Higher-level packages are expected
// to wrap this struct to build resource-specific contexts (e.x. an
// ExchangeMailPath).
// Resource-specific paths allow access to more information like segments in the
// path. Builders that are turned into resource paths later on do not need to
// manually add prefixes for items that normally appear in the data layer (ex.
// tenant ID, service, user ID, etc).
type Builder struct {
// Unescaped version of elements.
elements Elements
}
// Append creates a copy of this Builder and adds the given elements them to the
// end of the new Builder. Elements are added in the order they are passed.
func (pb Builder) Append(elements ...string) *Builder {
res := &Builder{elements: make([]string, len(pb.elements))}
copy(res.elements, pb.elements)
// Unescaped elements can't fail validation.
//nolint:errcheck
res.appendElements(false, elements)
return res
}
func (pb *Builder) appendElements(escaped bool, elements []string) error {
for _, e := range elements {
if len(e) == 0 {
continue
}
tmp := e
if escaped {
tmp = TrimTrailingSlash(tmp)
// If tmp was just the path separator then it will be empty now.
if len(tmp) == 0 {
continue
}
if err := validateEscapedElement(tmp); err != nil {
return err
}
tmp = unescape(tmp)
}
pb.elements = append(pb.elements, tmp)
}
return nil
}
// UnescapeAndAppend creates a copy of this Builder and adds one or more already
// escaped path elements to the end of the new Builder. Elements are added in
// the order they are passed.
func (pb Builder) UnescapeAndAppend(elements ...string) (*Builder, error) {
res := &Builder{elements: make([]string, 0, len(pb.elements))}
copy(res.elements, pb.elements)
if err := res.appendElements(true, elements); err != nil {
return nil, err
}
return res, nil
}
// SplitUnescapeAppend takes in an escaped string representing a directory
// path, splits the string, and appends it to the current builder.
func (pb Builder) SplitUnescapeAppend(s string) (*Builder, error) {
elems := Split(TrimTrailingSlash(s))
return pb.UnescapeAndAppend(elems...)
}
func (pb Builder) PopFront() *Builder {
if len(pb.elements) <= 1 {
return &Builder{}
}
elements := make([]string, len(pb.elements)-1)
copy(elements, pb.elements[1:])
return &Builder{
elements: elements,
}
}
// Dir removes the last element from the builder.
func (pb Builder) Dir() *Builder {
if len(pb.elements) <= 1 {
return &Builder{}
}
return &Builder{
// Safe to use the same elements because Builders are immutable.
elements: pb.elements[:len(pb.elements)-1],
}
}
// HeadElem returns the first element in the Builder.
func (pb Builder) HeadElem() string {
if len(pb.elements) == 0 {
return ""
}
return pb.elements[0]
}
// LastElem returns the last element in the Builder.
func (pb Builder) LastElem() string {
if len(pb.elements) == 0 {
return ""
}
return pb.elements[len(pb.elements)-1]
}
// UpdateParent updates leading elements matching prev to be cur and returns
// true if it was updated. If prev is not a prefix of this Builder changes
// nothing and returns false. If either prev or cur is nil does nothing and
// returns false.
func (pb *Builder) UpdateParent(prev, cur *Builder) bool {
if prev == cur || prev == nil || cur == nil || len(prev.Elements()) > len(pb.Elements()) {
return false
}
parent := true
for i, e := range prev.Elements() {
if pb.elements[i] != e {
parent = false
break
}
}
if !parent {
return false
}
pb.elements = append(cur.Elements(), pb.elements[len(prev.Elements()):]...)
return true
}
// ShortRef produces a truncated hash of the builder that
// acts as a unique identifier.
func (pb Builder) ShortRef() string {
if len(pb.elements) == 0 {
return ""
}
data := bytes.Buffer{}
for _, element := range pb.elements {
data.WriteString(element)
}
sum := sha256.Sum256(data.Bytes())
// Some conversions to get the right number of characters in the output. This
// outputs hex, so we need to take the target number of characters and do the
// equivalent of (shortRefCharacters * 4) / 8. This is
// <number of bits represented> / <bits per byte> which gets us how many bytes
// to give to our format command.
numBytes := shortRefCharacters / 2
return fmt.Sprintf("%x", sum[:numBytes])
}
// Elements returns all the elements in the path. This is a temporary function
// and will likely be updated to handle encoded elements instead of clear-text
// elements in the future.
func (pb Builder) Elements() Elements {
return append(Elements{}, pb.elements...)
}
func ServicePrefix(
tenant, resourceOwner string,
s ServiceType,
c CategoryType,
) (Path, error) {
pb := Builder{}
if err := ValidateServiceAndCategory(s, c); err != nil {
return nil, err
}
if err := verifyInputValues(tenant, resourceOwner); err != nil {
return nil, err
}
return &dataLayerResourcePath{
Builder: *pb.withPrefix(tenant, s.String(), resourceOwner, c.String()),
service: s,
category: c,
hasItem: false,
}, nil
}
// withPrefix creates a Builder prefixed with the parameter values, and
// concatenated with the current builder elements.
func (pb Builder) withPrefix(elements ...string) *Builder {
res := Builder{}.Append(elements...)
res.elements = append(res.elements, pb.elements...)
return res
}
// ---------------------------------------------------------------------------
// Data Layer Path Transformers
// ---------------------------------------------------------------------------
func (pb Builder) ToStreamStorePath(
tenant, purpose string,
service ServiceType,
isItem bool,
) (Path, error) {
if err := verifyInputValues(tenant, purpose); err != nil {
return nil, err
}
if isItem && len(pb.elements) == 0 {
return nil, clues.New("missing path beyond prefix")
}
metadataService := UnknownService
switch service {
case ExchangeService:
metadataService = ExchangeMetadataService
case OneDriveService:
metadataService = OneDriveMetadataService
case SharePointService:
metadataService = SharePointMetadataService
}
return &dataLayerResourcePath{
Builder: *pb.withPrefix(
tenant,
metadataService.String(),
purpose,
DetailsCategory.String()),
service: metadataService,
category: DetailsCategory,
hasItem: isItem,
}, nil
}
func (pb Builder) ToServiceCategoryMetadataPath(
tenant, user string,
service ServiceType,
category CategoryType,
isItem bool,
) (Path, error) {
if err := ValidateServiceAndCategory(service, category); err != nil {
return nil, err
}
if err := verifyInputValues(tenant, user); err != nil {
return nil, err
}
if isItem && len(pb.elements) == 0 {
return nil, clues.New("missing path beyond prefix")
}
metadataService := UnknownService
switch service {
case ExchangeService:
metadataService = ExchangeMetadataService
case OneDriveService:
metadataService = OneDriveMetadataService
case SharePointService:
metadataService = SharePointMetadataService
}
return &dataLayerResourcePath{
Builder: *pb.withPrefix(
tenant,
metadataService.String(),
user,
category.String(),
),
service: metadataService,
category: category,
hasItem: isItem,
}, nil
}
func (pb Builder) ToDataLayerPath(
tenant, user string,
service ServiceType,
category CategoryType,
isItem bool,
) (Path, error) {
if err := ValidateServiceAndCategory(service, category); err != nil {
return nil, err
}
if err := pb.verifyPrefix(tenant, user); err != nil {
return nil, err
}
return &dataLayerResourcePath{
Builder: *pb.withPrefix(
tenant,
service.String(),
user,
category.String(),
),
service: service,
category: category,
hasItem: isItem,
}, nil
}
func (pb Builder) ToDataLayerExchangePathForCategory(
tenant, user string,
category CategoryType,
isItem bool,
) (Path, error) {
return pb.ToDataLayerPath(tenant, user, ExchangeService, category, isItem)
}
func (pb Builder) ToDataLayerOneDrivePath(
tenant, user string,
isItem bool,
) (Path, error) {
return pb.ToDataLayerPath(tenant, user, OneDriveService, FilesCategory, isItem)
}
func (pb Builder) ToDataLayerSharePointPath(
tenant, site string,
category CategoryType,
isItem bool,
) (Path, error) {
return pb.ToDataLayerPath(tenant, site, SharePointService, category, isItem)
}
// ---------------------------------------------------------------------------
// Stringers and PII Concealer Compliance
// ---------------------------------------------------------------------------
// Conceal produces a concealed representation of the builder, suitable for
// logging, storing in errors, and other output.
func (pb Builder) Conceal() string {
return pb.elements.Conceal()
}
// Format produces a concealed representation of the builder, even when
// used within a PrintF, suitable for logging, storing in errors,
// and other output.
func (pb Builder) Format(fs fmt.State, _ rune) {
fmt.Fprint(fs, pb.Conceal())
}
// String returns a string that contains all path elements joined together.
// Elements of the path that need escaping are escaped.
// The result is not concealed, and is not suitable for logging or structured
// errors.
func (pb Builder) String() string {
return pb.elements.String()
}
// PlainString returns an unescaped, unmodified string of the builder.
// The result is not concealed, and is not suitable for logging or structured
// errors.
func (pb Builder) PlainString() string {
return pb.elements.PlainString()
}
// ---------------------------------------------------------------------------
// Exported Helpers
// ---------------------------------------------------------------------------
func Build(
tenant, resourceOwner string,
service ServiceType,
category CategoryType,
hasItem bool,
elements ...string,
) (Path, error) {
b := Builder{}.Append(elements...)
return b.ToDataLayerPath(
tenant, resourceOwner,
service, category,
hasItem)
}
// FromDataLayerPath parses the escaped path p, validates the elements in p
// match a resource-specific path format, and returns a Path struct for that
// resource-specific type. If p does not match any resource-specific paths or
// is malformed returns an error.
func FromDataLayerPath(p string, isItem bool) (Path, error) {
p = TrimTrailingSlash(p)
// If p was just the path separator then it will be empty now.
if len(p) == 0 {
return nil, clues.New("logically empty path given").With("path_string", p)
}
// Turn into a Builder to reuse code that ignores empty elements.
pb, err := Builder{}.UnescapeAndAppend(Split(p)...)
if err != nil {
return nil, clues.Stack(errParsingPath, err).With("path_string", p)
}
if len(pb.elements) < 5 {
return nil, clues.New("path has too few segments").With("path_string", p)
}
service, category, err := validateServiceAndCategoryStrings(
pb.elements[1],
pb.elements[3],
)
if err != nil {
return nil, clues.Stack(errParsingPath, err).With("path_string", p)
}
return &dataLayerResourcePath{
Builder: *pb,
service: service,
category: category,
hasItem: isItem,
}, nil
}
// TrimTrailingSlash takes an escaped path element and returns an escaped path
// element with the trailing path separator character(s) removed if they were not
// escaped. If there were no trailing path separator character(s) or the separator(s)
// were escaped the input is returned unchanged.
func TrimTrailingSlash(element string) string {
for len(element) > 0 && element[len(element)-1] == PathSeparator {
lastIdx := len(element) - 1
numSlashes := 0
for i := lastIdx - 1; i >= 0; i-- {
if element[i] != escapeCharacter {
break
}
numSlashes++
}
if numSlashes%2 != 0 {
break
}
element = element[:lastIdx]
}
return element
}
// split takes an escaped string and returns a slice of path elements. The
// string is split on the path separator according to the escaping rules. The
// provided string must not contain an unescaped trailing path separator.
func Split(segment string) []string {
res := make([]string, 0)
numEscapes := 0
startIdx := 0
// Start with true to ignore leading separator.
prevWasSeparator := true
for i, c := range segment {
if c == escapeCharacter {
prevWasSeparator = false
numEscapes++
continue
}
if c != PathSeparator {
prevWasSeparator = false
numEscapes = 0
continue
}
// Remaining is just path separator handling.
if numEscapes%2 != 0 {
// This is an escaped separator.
prevWasSeparator = false
numEscapes = 0
continue
}
// Ignore leading separator characters and don't add elements that would
// be empty.
if !prevWasSeparator {
res = append(res, segment[startIdx:i])
}
// We don't want to include the path separator in the result.
startIdx = i + 1
prevWasSeparator = true
numEscapes = 0
}
// Add the final segment because the loop above won't catch it. There should
// be no trailing separator character.
res = append(res, segment[startIdx:])
return res
}
// ---------------------------------------------------------------------------
// Unexported Helpers
// ---------------------------------------------------------------------------
func verifyInputValues(tenant, resourceOwner string) error {
if len(tenant) == 0 {
return clues.Stack(errMissingSegment, clues.New("tenant"))
}
if len(resourceOwner) == 0 {
return clues.Stack(errMissingSegment, clues.New("resourceOwner"))
}
return nil
}
// escapeElement takes a single path element and escapes all characters that
// require an escape sequence. If there are no characters that need escaping,
// the input is returned unchanged.
func escapeElement(element string) string {
escapeIdx := make([]int, 0)
for i, c := range element {
if _, ok := charactersToEscape[c]; ok {
escapeIdx = append(escapeIdx, i)
}
}
if len(escapeIdx) == 0 {
return element
}
startIdx := 0
b := strings.Builder{}
b.Grow(len(element) + len(escapeIdx))
for _, idx := range escapeIdx {
b.WriteString(element[startIdx:idx])
b.WriteRune(escapeCharacter)
startIdx = idx
}
// Add the end of the element after the last escape character.
b.WriteString(element[startIdx:])
return b.String()
}
// unescape returns the given element and converts it into a "raw"
// element that does not have escape characters before characters that need
// escaping. Using this function on segments that contain escaped path
// separators will result in an ambiguous or incorrect segment.
func unescape(element string) string {
b := strings.Builder{}
startIdx := 0
prevWasEscape := false
for i, c := range element {
if c != escapeCharacter || prevWasEscape {
prevWasEscape = false
continue
}
// This is an escape character, remove it from the output.
b.WriteString(element[startIdx:i])
startIdx = i + 1
prevWasEscape = true
}
b.WriteString(element[startIdx:])
return b.String()
}
// validateEscapedElement takes an escaped element that has had trailing
// separators trimmed and ensures that no characters requiring escaping are
// unescaped and that no escape characters are combined with characters that
// don't need escaping.
func validateEscapedElement(element string) error {
prevWasEscape := false
for _, c := range element {
switch prevWasEscape {
case true:
prevWasEscape = false
if _, ok := charactersToEscape[c]; !ok {
return clues.New("bad escape sequence in path").
With("escape_sequence", fmt.Sprintf("'%c%c'", escapeCharacter, c))
}
case false:
if c == escapeCharacter {
prevWasEscape = true
continue
}
if _, ok := charactersToEscape[c]; ok {
return clues.New("unescaped character in path").With("character", c)
}
}
}
if prevWasEscape {
return clues.New("trailing escape character")
}
return nil
}
// join returns a string containing the given elements joined by the path
// separator '/'.
func join(elements []string) string {
// Have to use strings because path package does not handle escaped '/' and
// '\' according to the escaping rules.
return strings.Join(elements, string(PathSeparator))
}
// verifyPrefix ensures that the tenant and resourceOwner are valid
// values, and that the builder has some directory structure.
func (pb Builder) verifyPrefix(tenant, resourceOwner string) error {
if err := verifyInputValues(tenant, resourceOwner); err != nil {
return err
}
if len(pb.elements) == 0 {
return clues.New("missing path beyond prefix")
}
return nil
}