// Package path provides a set of functions for wrangling paths from the outside // world into paths that corso can understand. Paths use the standard Unix path // separator character '/'. If for some reason an individual element in a raw // path contains the '/' character, it should be escaped with '\'. If the path // contains '\' it should be escaped by turning it into '\\'. // // Paths can be split into elements by splitting on '/' if the '/' is not // escaped. Additionally, corso may operate on segments in a path. Segments are // made up of one or more path elements. // // Examples of paths splitting by elements and canonicalization with escaping: // 1. // // input path: `this/is/a/path` // elements of path: `this`, `is`, `a`, `path` // // 2. // // input path: `this/is\/a/path` // elements of path: `this`, `is/a`, `path` // // 3. // // input path: `this/is\\/a/path` // elements of path: `this`, `is\`, `a`, `path` // // 4. // // input path: `this/is\\\/a/path` // elements of path: `this`, `is\/a`, `path` // // 5. // // input path: `this/is//a/path` // elements of path: `this`, `is`, `a`, `path` // // 6. // // input path: `this/is\//a/path` // elements of path: `this`, `is/`, `a`, `path` // // 7. // // input path: `this/is/a/path/` // elements of path: `this`, `is`, `a`, `path` // // 8. // // input path: `this/is/a/path\/` // elements of path: `this`, `is`, `a`, `path/` package path import ( "bytes" "crypto/sha256" "fmt" "strings" "github.com/alcionai/clues" ) const ( escapeCharacter = '\\' PathSeparator = '/' shortRefCharacters = 12 ) var charactersToEscape = map[rune]struct{}{ PathSeparator: {}, escapeCharacter: {}, } var ( errMissingSegment = clues.New("missing required path element") errParsingPath = clues.New("parsing resource path") ) // For now, adding generic functions to pull information from segments. // Resources that don't have the requested information should return an empty // string. type Path interface { String() string Service() ServiceType Category() CategoryType Tenant() string ResourceOwner() string Folder(escaped bool) string Folders() Elements Item() string // UpdateParent updates parent from old to new if the item/folder was // parented by old path UpdateParent(prev, cur Path) bool // PopFront returns a Builder object with the first element (left-side) // removed. As the resulting set of elements is no longer a valid resource // path a Builder is returned instead. PopFront() *Builder // Dir returns a Path object with the right-most element removed if possible. // If removing the right-most element would discard one of the required prefix // elements then an error is returned. Dir() (Path, error) // Elements returns all the elements in the path. This is a temporary function // and will likely be updated to handle encoded elements instead of clear-text // elements in the future. Elements() Elements // Append returns a new Path object with the given element added to the end of // the old Path if possible. If the old Path is an item Path then Append // returns an error. Append(isItem bool, elems ...string) (Path, error) // AppendItem is a shorthand for Append(true, someItem) AppendItem(item string) (Path, error) // ShortRef returns a short reference representing this path. The short // reference is guaranteed to be unique. No guarantees are made about whether // a short reference can be converted back into the Path that generated it. ShortRef() string // ToBuilder returns a Builder instance that represents the current Path. ToBuilder() *Builder // Every path needs to comply with these funcs to ensure that PII // is appropriately hidden from logging, errors, and other outputs. clues.Concealer fmt.Stringer // In the rare case that the path needs to get printed as a plain string, // without obscuring values for PII. clues.PlainStringer } // interface compliance required for handling PII var ( _ clues.Concealer = &Builder{} _ fmt.Stringer = &Builder{} ) // RestorePaths denotes the location to find an item in kopia and the path of // the collection to place the item in for restore. type RestorePaths struct { StoragePath Path RestorePath Path } // Builder is a simple path representation that only tracks path elements. It // can join, escape, and unescape elements. Higher-level packages are expected // to wrap this struct to build resource-specific contexts (e.x. an // ExchangeMailPath). // Resource-specific paths allow access to more information like segments in the // path. Builders that are turned into resource paths later on do not need to // manually add prefixes for items that normally appear in the data layer (ex. // tenant ID, service, user ID, etc). type Builder struct { // Unescaped version of elements. elements Elements } // Append creates a copy of this Builder and adds the given elements them to the // end of the new Builder. Elements are added in the order they are passed. func (pb Builder) Append(elements ...string) *Builder { res := &Builder{elements: make([]string, len(pb.elements))} copy(res.elements, pb.elements) // Unescaped elements can't fail validation. //nolint:errcheck res.appendElements(false, elements) return res } func (pb *Builder) appendElements(escaped bool, elements []string) error { for _, e := range elements { if len(e) == 0 { continue } tmp := e if escaped { tmp = TrimTrailingSlash(tmp) // If tmp was just the path separator then it will be empty now. if len(tmp) == 0 { continue } if err := validateEscapedElement(tmp); err != nil { return err } tmp = unescape(tmp) } pb.elements = append(pb.elements, tmp) } return nil } // UnescapeAndAppend creates a copy of this Builder and adds one or more already // escaped path elements to the end of the new Builder. Elements are added in // the order they are passed. func (pb Builder) UnescapeAndAppend(elements ...string) (*Builder, error) { res := &Builder{elements: make([]string, 0, len(pb.elements))} copy(res.elements, pb.elements) if err := res.appendElements(true, elements); err != nil { return nil, err } return res, nil } // SplitUnescapeAppend takes in an escaped string representing a directory // path, splits the string, and appends it to the current builder. func (pb Builder) SplitUnescapeAppend(s string) (*Builder, error) { elems := Split(TrimTrailingSlash(s)) return pb.UnescapeAndAppend(elems...) } func (pb Builder) PopFront() *Builder { if len(pb.elements) <= 1 { return &Builder{} } elements := make([]string, len(pb.elements)-1) copy(elements, pb.elements[1:]) return &Builder{ elements: elements, } } // Dir removes the last element from the builder. func (pb Builder) Dir() *Builder { if len(pb.elements) <= 1 { return &Builder{} } return &Builder{ // Safe to use the same elements because Builders are immutable. elements: pb.elements[:len(pb.elements)-1], } } // HeadElem returns the first element in the Builder. func (pb Builder) HeadElem() string { if len(pb.elements) == 0 { return "" } return pb.elements[0] } // LastElem returns the last element in the Builder. func (pb Builder) LastElem() string { if len(pb.elements) == 0 { return "" } return pb.elements[len(pb.elements)-1] } // UpdateParent updates leading elements matching prev to be cur and returns // true if it was updated. If prev is not a prefix of this Builder changes // nothing and returns false. If either prev or cur is nil does nothing and // returns false. func (pb *Builder) UpdateParent(prev, cur *Builder) bool { if prev == cur || prev == nil || cur == nil || len(prev.Elements()) > len(pb.Elements()) { return false } parent := true for i, e := range prev.Elements() { if pb.elements[i] != e { parent = false break } } if !parent { return false } pb.elements = append(cur.Elements(), pb.elements[len(prev.Elements()):]...) return true } // ShortRef produces a truncated hash of the builder that // acts as a unique identifier. func (pb Builder) ShortRef() string { if len(pb.elements) == 0 { return "" } data := bytes.Buffer{} for _, element := range pb.elements { data.WriteString(element) } sum := sha256.Sum256(data.Bytes()) // Some conversions to get the right number of characters in the output. This // outputs hex, so we need to take the target number of characters and do the // equivalent of (shortRefCharacters * 4) / 8. This is // / which gets us how many bytes // to give to our format command. numBytes := shortRefCharacters / 2 return fmt.Sprintf("%x", sum[:numBytes]) } // Elements returns all the elements in the path. This is a temporary function // and will likely be updated to handle encoded elements instead of clear-text // elements in the future. func (pb Builder) Elements() Elements { return append(Elements{}, pb.elements...) } func ServicePrefix( tenant, resourceOwner string, s ServiceType, c CategoryType, ) (Path, error) { pb := Builder{} if err := ValidateServiceAndCategory(s, c); err != nil { return nil, err } if err := verifyInputValues(tenant, resourceOwner); err != nil { return nil, err } return &dataLayerResourcePath{ Builder: *pb.withPrefix(tenant, s.String(), resourceOwner, c.String()), service: s, category: c, hasItem: false, }, nil } // withPrefix creates a Builder prefixed with the parameter values, and // concatenated with the current builder elements. func (pb Builder) withPrefix(elements ...string) *Builder { res := Builder{}.Append(elements...) res.elements = append(res.elements, pb.elements...) return res } // --------------------------------------------------------------------------- // Data Layer Path Transformers // --------------------------------------------------------------------------- func (pb Builder) ToStreamStorePath( tenant, purpose string, service ServiceType, isItem bool, ) (Path, error) { if err := verifyInputValues(tenant, purpose); err != nil { return nil, err } if isItem && len(pb.elements) == 0 { return nil, clues.New("missing path beyond prefix") } metadataService := UnknownService switch service { case ExchangeService: metadataService = ExchangeMetadataService case OneDriveService: metadataService = OneDriveMetadataService case SharePointService: metadataService = SharePointMetadataService } return &dataLayerResourcePath{ Builder: *pb.withPrefix( tenant, metadataService.String(), purpose, DetailsCategory.String()), service: metadataService, category: DetailsCategory, hasItem: isItem, }, nil } func (pb Builder) ToServiceCategoryMetadataPath( tenant, user string, service ServiceType, category CategoryType, isItem bool, ) (Path, error) { if err := ValidateServiceAndCategory(service, category); err != nil { return nil, err } if err := verifyInputValues(tenant, user); err != nil { return nil, err } if isItem && len(pb.elements) == 0 { return nil, clues.New("missing path beyond prefix") } metadataService := UnknownService switch service { case ExchangeService: metadataService = ExchangeMetadataService case OneDriveService: metadataService = OneDriveMetadataService case SharePointService: metadataService = SharePointMetadataService } return &dataLayerResourcePath{ Builder: *pb.withPrefix( tenant, metadataService.String(), user, category.String(), ), service: metadataService, category: category, hasItem: isItem, }, nil } func (pb Builder) ToDataLayerPath( tenant, user string, service ServiceType, category CategoryType, isItem bool, ) (Path, error) { if err := ValidateServiceAndCategory(service, category); err != nil { return nil, err } if err := pb.verifyPrefix(tenant, user); err != nil { return nil, err } return &dataLayerResourcePath{ Builder: *pb.withPrefix( tenant, service.String(), user, category.String(), ), service: service, category: category, hasItem: isItem, }, nil } func (pb Builder) ToDataLayerExchangePathForCategory( tenant, user string, category CategoryType, isItem bool, ) (Path, error) { return pb.ToDataLayerPath(tenant, user, ExchangeService, category, isItem) } func (pb Builder) ToDataLayerOneDrivePath( tenant, user string, isItem bool, ) (Path, error) { return pb.ToDataLayerPath(tenant, user, OneDriveService, FilesCategory, isItem) } func (pb Builder) ToDataLayerSharePointPath( tenant, site string, category CategoryType, isItem bool, ) (Path, error) { return pb.ToDataLayerPath(tenant, site, SharePointService, category, isItem) } // --------------------------------------------------------------------------- // Stringers and PII Concealer Compliance // --------------------------------------------------------------------------- // Conceal produces a concealed representation of the builder, suitable for // logging, storing in errors, and other output. func (pb Builder) Conceal() string { return pb.elements.Conceal() } // Format produces a concealed representation of the builder, even when // used within a PrintF, suitable for logging, storing in errors, // and other output. func (pb Builder) Format(fs fmt.State, _ rune) { fmt.Fprint(fs, pb.Conceal()) } // String returns a string that contains all path elements joined together. // Elements of the path that need escaping are escaped. // The result is not concealed, and is not suitable for logging or structured // errors. func (pb Builder) String() string { return pb.elements.String() } // PlainString returns an unescaped, unmodified string of the builder. // The result is not concealed, and is not suitable for logging or structured // errors. func (pb Builder) PlainString() string { return pb.elements.PlainString() } // --------------------------------------------------------------------------- // Exported Helpers // --------------------------------------------------------------------------- func Build( tenant, resourceOwner string, service ServiceType, category CategoryType, hasItem bool, elements ...string, ) (Path, error) { b := Builder{}.Append(elements...) return b.ToDataLayerPath( tenant, resourceOwner, service, category, hasItem) } // FromDataLayerPath parses the escaped path p, validates the elements in p // match a resource-specific path format, and returns a Path struct for that // resource-specific type. If p does not match any resource-specific paths or // is malformed returns an error. func FromDataLayerPath(p string, isItem bool) (Path, error) { p = TrimTrailingSlash(p) // If p was just the path separator then it will be empty now. if len(p) == 0 { return nil, clues.New("logically empty path given").With("path_string", p) } // Turn into a Builder to reuse code that ignores empty elements. pb, err := Builder{}.UnescapeAndAppend(Split(p)...) if err != nil { return nil, clues.Stack(errParsingPath, err).With("path_string", p) } if len(pb.elements) < 5 { return nil, clues.New("path has too few segments").With("path_string", p) } service, category, err := validateServiceAndCategoryStrings( pb.elements[1], pb.elements[3], ) if err != nil { return nil, clues.Stack(errParsingPath, err).With("path_string", p) } return &dataLayerResourcePath{ Builder: *pb, service: service, category: category, hasItem: isItem, }, nil } // TrimTrailingSlash takes an escaped path element and returns an escaped path // element with the trailing path separator character(s) removed if they were not // escaped. If there were no trailing path separator character(s) or the separator(s) // were escaped the input is returned unchanged. func TrimTrailingSlash(element string) string { for len(element) > 0 && element[len(element)-1] == PathSeparator { lastIdx := len(element) - 1 numSlashes := 0 for i := lastIdx - 1; i >= 0; i-- { if element[i] != escapeCharacter { break } numSlashes++ } if numSlashes%2 != 0 { break } element = element[:lastIdx] } return element } // split takes an escaped string and returns a slice of path elements. The // string is split on the path separator according to the escaping rules. The // provided string must not contain an unescaped trailing path separator. func Split(segment string) []string { res := make([]string, 0) numEscapes := 0 startIdx := 0 // Start with true to ignore leading separator. prevWasSeparator := true for i, c := range segment { if c == escapeCharacter { prevWasSeparator = false numEscapes++ continue } if c != PathSeparator { prevWasSeparator = false numEscapes = 0 continue } // Remaining is just path separator handling. if numEscapes%2 != 0 { // This is an escaped separator. prevWasSeparator = false numEscapes = 0 continue } // Ignore leading separator characters and don't add elements that would // be empty. if !prevWasSeparator { res = append(res, segment[startIdx:i]) } // We don't want to include the path separator in the result. startIdx = i + 1 prevWasSeparator = true numEscapes = 0 } // Add the final segment because the loop above won't catch it. There should // be no trailing separator character. res = append(res, segment[startIdx:]) return res } // --------------------------------------------------------------------------- // Unexported Helpers // --------------------------------------------------------------------------- func verifyInputValues(tenant, resourceOwner string) error { if len(tenant) == 0 { return clues.Stack(errMissingSegment, clues.New("tenant")) } if len(resourceOwner) == 0 { return clues.Stack(errMissingSegment, clues.New("resourceOwner")) } return nil } // escapeElement takes a single path element and escapes all characters that // require an escape sequence. If there are no characters that need escaping, // the input is returned unchanged. func escapeElement(element string) string { escapeIdx := make([]int, 0) for i, c := range element { if _, ok := charactersToEscape[c]; ok { escapeIdx = append(escapeIdx, i) } } if len(escapeIdx) == 0 { return element } startIdx := 0 b := strings.Builder{} b.Grow(len(element) + len(escapeIdx)) for _, idx := range escapeIdx { b.WriteString(element[startIdx:idx]) b.WriteRune(escapeCharacter) startIdx = idx } // Add the end of the element after the last escape character. b.WriteString(element[startIdx:]) return b.String() } // unescape returns the given element and converts it into a "raw" // element that does not have escape characters before characters that need // escaping. Using this function on segments that contain escaped path // separators will result in an ambiguous or incorrect segment. func unescape(element string) string { b := strings.Builder{} startIdx := 0 prevWasEscape := false for i, c := range element { if c != escapeCharacter || prevWasEscape { prevWasEscape = false continue } // This is an escape character, remove it from the output. b.WriteString(element[startIdx:i]) startIdx = i + 1 prevWasEscape = true } b.WriteString(element[startIdx:]) return b.String() } // validateEscapedElement takes an escaped element that has had trailing // separators trimmed and ensures that no characters requiring escaping are // unescaped and that no escape characters are combined with characters that // don't need escaping. func validateEscapedElement(element string) error { prevWasEscape := false for _, c := range element { switch prevWasEscape { case true: prevWasEscape = false if _, ok := charactersToEscape[c]; !ok { return clues.New("bad escape sequence in path"). With("escape_sequence", fmt.Sprintf("'%c%c'", escapeCharacter, c)) } case false: if c == escapeCharacter { prevWasEscape = true continue } if _, ok := charactersToEscape[c]; ok { return clues.New("unescaped character in path").With("character", c) } } } if prevWasEscape { return clues.New("trailing escape character") } return nil } // join returns a string containing the given elements joined by the path // separator '/'. func join(elements []string) string { // Have to use strings because path package does not handle escaped '/' and // '\' according to the escaping rules. return strings.Join(elements, string(PathSeparator)) } // verifyPrefix ensures that the tenant and resourceOwner are valid // values, and that the builder has some directory structure. func (pb Builder) verifyPrefix(tenant, resourceOwner string) error { if err := verifyInputValues(tenant, resourceOwner); err != nil { return err } if len(pb.elements) == 0 { return clues.New("missing path beyond prefix") } return nil }