## Description Adds a new reference to the details ent: location- ref. The location holds the human-readable version of the item's location in whatever m365 service sourced the item. Hookup is incomplete, following PRs will fill out functionality. Also adds a LocationPather interface to data_ collections to pass this data back and forth between producers and consumers. Should be safe to merge into main. ## Does this PR need a docs update or release note? - [x] 🕐 Yes, but in a later PR ## Type of change - [x] 🌻 Feature - [x] 🐛 Bugfix ## Issue(s) * #2423 ## Test Plan - [x] ⚡ Unit test
645 lines
16 KiB
Go
645 lines
16 KiB
Go
// Package path provides a set of functions for wrangling paths from the outside
|
|
// world into paths that corso can understand. Paths use the standard Unix path
|
|
// separator character '/'. If for some reason an individual element in a raw
|
|
// path contains the '/' character, it should be escaped with '\'. If the path
|
|
// contains '\' it should be escaped by turning it into '\\'.
|
|
//
|
|
// Paths can be split into elements by splitting on '/' if the '/' is not
|
|
// escaped. Additionally, corso may operate on segments in a path. Segments are
|
|
// made up of one or more path elements.
|
|
//
|
|
// Examples of paths splitting by elements and canonicalization with escaping:
|
|
// 1.
|
|
//
|
|
// input path: `this/is/a/path`
|
|
// elements of path: `this`, `is`, `a`, `path`
|
|
//
|
|
// 2.
|
|
//
|
|
// input path: `this/is\/a/path`
|
|
// elements of path: `this`, `is/a`, `path`
|
|
//
|
|
// 3.
|
|
//
|
|
// input path: `this/is\\/a/path`
|
|
// elements of path: `this`, `is\`, `a`, `path`
|
|
//
|
|
// 4.
|
|
//
|
|
// input path: `this/is\\\/a/path`
|
|
// elements of path: `this`, `is\/a`, `path`
|
|
//
|
|
// 5.
|
|
//
|
|
// input path: `this/is//a/path`
|
|
// elements of path: `this`, `is`, `a`, `path`
|
|
//
|
|
// 6.
|
|
//
|
|
// input path: `this/is\//a/path`
|
|
// elements of path: `this`, `is/`, `a`, `path`
|
|
//
|
|
// 7.
|
|
//
|
|
// input path: `this/is/a/path/`
|
|
// elements of path: `this`, `is`, `a`, `path`
|
|
//
|
|
// 8.
|
|
//
|
|
// input path: `this/is/a/path\/`
|
|
// elements of path: `this`, `is`, `a`, `path/`
|
|
package path
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"strings"
|
|
|
|
"github.com/alcionai/clues"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
const (
|
|
escapeCharacter = '\\'
|
|
PathSeparator = '/'
|
|
|
|
shortRefCharacters = 12
|
|
)
|
|
|
|
var charactersToEscape = map[rune]struct{}{
|
|
PathSeparator: {},
|
|
escapeCharacter: {},
|
|
}
|
|
|
|
var (
|
|
errMissingSegment = errors.New("missing required path element")
|
|
errParsingPath = errors.New("parsing resource path")
|
|
)
|
|
|
|
// For now, adding generic functions to pull information from segments.
|
|
// Resources that don't have the requested information should return an empty
|
|
// string.
|
|
type Path interface {
|
|
String() string
|
|
Service() ServiceType
|
|
Category() CategoryType
|
|
Tenant() string
|
|
ResourceOwner() string
|
|
Folder(bool) string
|
|
Folders() []string
|
|
Item() string
|
|
// PopFront returns a Builder object with the first element (left-side)
|
|
// removed. As the resulting set of elements is no longer a valid resource
|
|
// path a Builder is returned instead.
|
|
PopFront() *Builder
|
|
// Dir returns a Path object with the right-most element removed if possible.
|
|
// If removing the right-most element would discard one of the required prefix
|
|
// elements then an error is returned.
|
|
Dir() (Path, error)
|
|
// Elements returns all the elements in the path. This is a temporary function
|
|
// and will likely be updated to handle encoded elements instead of clear-text
|
|
// elements in the future.
|
|
Elements() []string
|
|
// Append returns a new Path object with the given element added to the end of
|
|
// the old Path if possible. If the old Path is an item Path then Append
|
|
// returns an error.
|
|
Append(element string, isItem bool) (Path, error)
|
|
// ShortRef returns a short reference representing this path. The short
|
|
// reference is guaranteed to be unique. No guarantees are made about whether
|
|
// a short reference can be converted back into the Path that generated it.
|
|
ShortRef() string
|
|
// ToBuilder returns a Builder instance that represents the current Path.
|
|
ToBuilder() *Builder
|
|
}
|
|
|
|
// Builder is a simple path representation that only tracks path elements. It
|
|
// can join, escape, and unescape elements. Higher-level packages are expected
|
|
// to wrap this struct to build resource-speicific contexts (e.x. an
|
|
// ExchangeMailPath).
|
|
// Resource-specific paths allow access to more information like segments in the
|
|
// path. Builders that are turned into resource paths later on do not need to
|
|
// manually add prefixes for items that normally appear in the data layer (ex.
|
|
// tenant ID, service, user ID, etc).
|
|
type Builder struct {
|
|
// Unescaped version of elements.
|
|
elements []string
|
|
}
|
|
|
|
// UnescapeAndAppend creates a copy of this Builder and adds one or more already
|
|
// escaped path elements to the end of the new Builder. Elements are added in
|
|
// the order they are passed.
|
|
func (pb Builder) UnescapeAndAppend(elements ...string) (*Builder, error) {
|
|
res := &Builder{elements: make([]string, 0, len(pb.elements))}
|
|
copy(res.elements, pb.elements)
|
|
|
|
if err := res.appendElements(true, elements); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return res, nil
|
|
}
|
|
|
|
// SplitUnescapeAppend takes in an escaped string representing a directory
|
|
// path, splits the string, and appends it to the current builder.
|
|
func (pb Builder) SplitUnescapeAppend(s string) (*Builder, error) {
|
|
elems := Split(TrimTrailingSlash(s))
|
|
|
|
return pb.UnescapeAndAppend(elems...)
|
|
}
|
|
|
|
// Append creates a copy of this Builder and adds the given elements them to the
|
|
// end of the new Builder. Elements are added in the order they are passed.
|
|
func (pb Builder) Append(elements ...string) *Builder {
|
|
res := &Builder{elements: make([]string, len(pb.elements))}
|
|
copy(res.elements, pb.elements)
|
|
|
|
// Unescaped elements can't fail validation.
|
|
//nolint:errcheck
|
|
res.appendElements(false, elements)
|
|
|
|
return res
|
|
}
|
|
|
|
func (pb *Builder) appendElements(escaped bool, elements []string) error {
|
|
for _, e := range elements {
|
|
if len(e) == 0 {
|
|
continue
|
|
}
|
|
|
|
tmp := e
|
|
|
|
if escaped {
|
|
tmp = TrimTrailingSlash(tmp)
|
|
// If tmp was just the path separator then it will be empty now.
|
|
if len(tmp) == 0 {
|
|
continue
|
|
}
|
|
|
|
if err := validateEscapedElement(tmp); err != nil {
|
|
return err
|
|
}
|
|
|
|
tmp = unescape(tmp)
|
|
}
|
|
|
|
pb.elements = append(pb.elements, tmp)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (pb Builder) PopFront() *Builder {
|
|
if len(pb.elements) <= 1 {
|
|
return &Builder{}
|
|
}
|
|
|
|
elements := make([]string, len(pb.elements)-1)
|
|
copy(elements, pb.elements[1:])
|
|
|
|
return &Builder{
|
|
elements: elements,
|
|
}
|
|
}
|
|
|
|
func (pb Builder) Dir() *Builder {
|
|
if len(pb.elements) <= 1 {
|
|
return &Builder{}
|
|
}
|
|
|
|
return &Builder{
|
|
// Safe to use the same elements because Builders are immutable.
|
|
elements: pb.elements[:len(pb.elements)-1],
|
|
}
|
|
}
|
|
|
|
func (pb Builder) LastElem() string {
|
|
if len(pb.elements) == 0 {
|
|
return ""
|
|
}
|
|
|
|
return pb.elements[len(pb.elements)-1]
|
|
}
|
|
|
|
// String returns a string that contains all path elements joined together.
|
|
// Elements of the path that need escaping are escaped.
|
|
func (pb Builder) String() string {
|
|
escaped := make([]string, 0, len(pb.elements))
|
|
|
|
for _, e := range pb.elements {
|
|
escaped = append(escaped, escapeElement(e))
|
|
}
|
|
|
|
return join(escaped)
|
|
}
|
|
|
|
func (pb Builder) ShortRef() string {
|
|
if len(pb.elements) == 0 {
|
|
return ""
|
|
}
|
|
|
|
data := bytes.Buffer{}
|
|
|
|
for _, element := range pb.elements {
|
|
data.WriteString(element)
|
|
}
|
|
|
|
sum := sha256.Sum256(data.Bytes())
|
|
|
|
// Some conversions to get the right number of characters in the output. This
|
|
// outputs hex, so we need to take the target number of characters and do the
|
|
// equivalent of (shortRefCharacters * 4) / 8. This is
|
|
// <number of bits represented> / <bits per byte> which gets us how many bytes
|
|
// to give to our format command.
|
|
numBytes := shortRefCharacters / 2
|
|
|
|
return fmt.Sprintf("%x", sum[:numBytes])
|
|
}
|
|
|
|
// Elements returns all the elements in the path. This is a temporary function
|
|
// and will likely be updated to handle encoded elements instead of clear-text
|
|
// elements in the future.
|
|
func (pb Builder) Elements() []string {
|
|
return append([]string{}, pb.elements...)
|
|
}
|
|
|
|
func verifyInputValues(tenant, resourceOwner string) error {
|
|
if len(tenant) == 0 {
|
|
return clues.Stack(errMissingSegment, errors.New("tenant"))
|
|
}
|
|
|
|
if len(resourceOwner) == 0 {
|
|
return clues.Stack(errMissingSegment, errors.New("resourceOwner"))
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (pb Builder) verifyPrefix(tenant, resourceOwner string) error {
|
|
if err := verifyInputValues(tenant, resourceOwner); err != nil {
|
|
return err
|
|
}
|
|
|
|
if len(pb.elements) == 0 {
|
|
return errors.New("missing path beyond prefix")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (pb Builder) withPrefix(elements ...string) *Builder {
|
|
res := Builder{}.Append(elements...)
|
|
res.elements = append(res.elements, pb.elements...)
|
|
|
|
return res
|
|
}
|
|
|
|
func (pb Builder) ToStreamStorePath(
|
|
tenant, purpose string,
|
|
service ServiceType,
|
|
isItem bool,
|
|
) (Path, error) {
|
|
if err := verifyInputValues(tenant, purpose); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if isItem && len(pb.elements) == 0 {
|
|
return nil, errors.New("missing path beyond prefix")
|
|
}
|
|
|
|
metadataService := UnknownService
|
|
|
|
switch service {
|
|
case ExchangeService:
|
|
metadataService = ExchangeMetadataService
|
|
case OneDriveService:
|
|
metadataService = OneDriveMetadataService
|
|
case SharePointService:
|
|
metadataService = SharePointMetadataService
|
|
}
|
|
|
|
return &dataLayerResourcePath{
|
|
Builder: *pb.withPrefix(
|
|
tenant,
|
|
metadataService.String(),
|
|
purpose,
|
|
DetailsCategory.String(),
|
|
),
|
|
service: metadataService,
|
|
category: DetailsCategory,
|
|
hasItem: isItem,
|
|
}, nil
|
|
}
|
|
|
|
func (pb Builder) ToServiceCategoryMetadataPath(
|
|
tenant, user string,
|
|
service ServiceType,
|
|
category CategoryType,
|
|
isItem bool,
|
|
) (Path, error) {
|
|
if err := validateServiceAndCategory(service, category); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := verifyInputValues(tenant, user); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if isItem && len(pb.elements) == 0 {
|
|
return nil, errors.New("missing path beyond prefix")
|
|
}
|
|
|
|
metadataService := UnknownService
|
|
|
|
switch service {
|
|
case ExchangeService:
|
|
metadataService = ExchangeMetadataService
|
|
case OneDriveService:
|
|
metadataService = OneDriveMetadataService
|
|
case SharePointService:
|
|
metadataService = SharePointMetadataService
|
|
}
|
|
|
|
return &dataLayerResourcePath{
|
|
Builder: *pb.withPrefix(
|
|
tenant,
|
|
metadataService.String(),
|
|
user,
|
|
category.String(),
|
|
),
|
|
service: metadataService,
|
|
category: category,
|
|
hasItem: isItem,
|
|
}, nil
|
|
}
|
|
|
|
func (pb Builder) ToDataLayerPath(
|
|
tenant, user string,
|
|
service ServiceType,
|
|
category CategoryType,
|
|
isItem bool,
|
|
) (Path, error) {
|
|
if err := validateServiceAndCategory(service, category); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if err := pb.verifyPrefix(tenant, user); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &dataLayerResourcePath{
|
|
Builder: *pb.withPrefix(
|
|
tenant,
|
|
service.String(),
|
|
user,
|
|
category.String(),
|
|
),
|
|
service: service,
|
|
category: category,
|
|
hasItem: isItem,
|
|
}, nil
|
|
}
|
|
|
|
func (pb Builder) ToDataLayerExchangePathForCategory(
|
|
tenant, user string,
|
|
category CategoryType,
|
|
isItem bool,
|
|
) (Path, error) {
|
|
return pb.ToDataLayerPath(tenant, user, ExchangeService, category, isItem)
|
|
}
|
|
|
|
func (pb Builder) ToDataLayerOneDrivePath(
|
|
tenant, user string,
|
|
isItem bool,
|
|
) (Path, error) {
|
|
return pb.ToDataLayerPath(tenant, user, OneDriveService, FilesCategory, isItem)
|
|
}
|
|
|
|
func (pb Builder) ToDataLayerSharePointPath(
|
|
tenant, site string,
|
|
category CategoryType,
|
|
isItem bool,
|
|
) (Path, error) {
|
|
return pb.ToDataLayerPath(tenant, site, SharePointService, category, isItem)
|
|
}
|
|
|
|
// FromDataLayerPath parses the escaped path p, validates the elements in p
|
|
// match a resource-specific path format, and returns a Path struct for that
|
|
// resource-specific type. If p does not match any resource-specific paths or
|
|
// is malformed returns an error.
|
|
func FromDataLayerPath(p string, isItem bool) (Path, error) {
|
|
p = TrimTrailingSlash(p)
|
|
// If p was just the path separator then it will be empty now.
|
|
if len(p) == 0 {
|
|
return nil, clues.New("logically empty path given").With("path_string", p)
|
|
}
|
|
|
|
// Turn into a Builder to reuse code that ignores empty elements.
|
|
pb, err := Builder{}.UnescapeAndAppend(Split(p)...)
|
|
if err != nil {
|
|
return nil, clues.Stack(errParsingPath, err).With("path_string", p)
|
|
}
|
|
|
|
if len(pb.elements) < 5 {
|
|
return nil, clues.New("path has too few segments").With("path_string", p)
|
|
}
|
|
|
|
service, category, err := validateServiceAndCategoryStrings(
|
|
pb.elements[1],
|
|
pb.elements[3],
|
|
)
|
|
if err != nil {
|
|
return nil, clues.Stack(errParsingPath, err).With("path_string", p)
|
|
}
|
|
|
|
return &dataLayerResourcePath{
|
|
Builder: *pb,
|
|
service: service,
|
|
category: category,
|
|
hasItem: isItem,
|
|
}, nil
|
|
}
|
|
|
|
// escapeElement takes a single path element and escapes all characters that
|
|
// require an escape sequence. If there are no characters that need escaping,
|
|
// the input is returned unchanged.
|
|
func escapeElement(element string) string {
|
|
escapeIdx := make([]int, 0)
|
|
|
|
for i, c := range element {
|
|
if _, ok := charactersToEscape[c]; ok {
|
|
escapeIdx = append(escapeIdx, i)
|
|
}
|
|
}
|
|
|
|
if len(escapeIdx) == 0 {
|
|
return element
|
|
}
|
|
|
|
startIdx := 0
|
|
b := strings.Builder{}
|
|
b.Grow(len(element) + len(escapeIdx))
|
|
|
|
for _, idx := range escapeIdx {
|
|
b.WriteString(element[startIdx:idx])
|
|
b.WriteRune(escapeCharacter)
|
|
|
|
startIdx = idx
|
|
}
|
|
|
|
// Add the end of the element after the last escape character.
|
|
b.WriteString(element[startIdx:])
|
|
|
|
return b.String()
|
|
}
|
|
|
|
// unescape returns the given element and converts it into a "raw"
|
|
// element that does not have escape characters before characters that need
|
|
// escaping. Using this function on segments that contain escaped path
|
|
// separators will result in an ambiguous or incorrect segment.
|
|
func unescape(element string) string {
|
|
b := strings.Builder{}
|
|
startIdx := 0
|
|
prevWasEscape := false
|
|
|
|
for i, c := range element {
|
|
if c != escapeCharacter || prevWasEscape {
|
|
prevWasEscape = false
|
|
continue
|
|
}
|
|
|
|
// This is an escape character, remove it from the output.
|
|
b.WriteString(element[startIdx:i])
|
|
startIdx = i + 1
|
|
prevWasEscape = true
|
|
}
|
|
|
|
b.WriteString(element[startIdx:])
|
|
|
|
return b.String()
|
|
}
|
|
|
|
// validateEscapedElement takes an escaped element that has had trailing
|
|
// separators trimmed and ensures that no characters requiring escaping are
|
|
// unescaped and that no escape characters are combined with characters that
|
|
// don't need escaping.
|
|
func validateEscapedElement(element string) error {
|
|
prevWasEscape := false
|
|
|
|
for _, c := range element {
|
|
switch prevWasEscape {
|
|
case true:
|
|
prevWasEscape = false
|
|
|
|
if _, ok := charactersToEscape[c]; !ok {
|
|
return clues.New("bad escape sequence in path").
|
|
With("escape_sequence", fmt.Sprintf("'%c%c'", escapeCharacter, c))
|
|
}
|
|
|
|
case false:
|
|
if c == escapeCharacter {
|
|
prevWasEscape = true
|
|
continue
|
|
}
|
|
|
|
if _, ok := charactersToEscape[c]; ok {
|
|
return clues.New("unescaped character in path").With("character", c)
|
|
}
|
|
}
|
|
}
|
|
|
|
if prevWasEscape {
|
|
return errors.New("trailing escape character")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// TrimTrailingSlash takes an escaped path element and returns an escaped path
|
|
// element with the trailing path separator character(s) removed if they were not
|
|
// escaped. If there were no trailing path separator character(s) or the separator(s)
|
|
// were escaped the input is returned unchanged.
|
|
func TrimTrailingSlash(element string) string {
|
|
for len(element) > 0 && element[len(element)-1] == PathSeparator {
|
|
lastIdx := len(element) - 1
|
|
numSlashes := 0
|
|
|
|
for i := lastIdx - 1; i >= 0; i-- {
|
|
if element[i] != escapeCharacter {
|
|
break
|
|
}
|
|
|
|
numSlashes++
|
|
}
|
|
|
|
if numSlashes%2 != 0 {
|
|
break
|
|
}
|
|
|
|
element = element[:lastIdx]
|
|
}
|
|
|
|
return element
|
|
}
|
|
|
|
// join returns a string containing the given elements joined by the path
|
|
// separator '/'.
|
|
func join(elements []string) string {
|
|
// Have to use strings because path package does not handle escaped '/' and
|
|
// '\' according to the escaping rules.
|
|
return strings.Join(elements, string(PathSeparator))
|
|
}
|
|
|
|
// split takes an escaped string and returns a slice of path elements. The
|
|
// string is split on the path separator according to the escaping rules. The
|
|
// provided string must not contain an unescaped trailing path separator.
|
|
func Split(segment string) []string {
|
|
res := make([]string, 0)
|
|
numEscapes := 0
|
|
startIdx := 0
|
|
// Start with true to ignore leading separator.
|
|
prevWasSeparator := true
|
|
|
|
for i, c := range segment {
|
|
if c == escapeCharacter {
|
|
prevWasSeparator = false
|
|
numEscapes++
|
|
|
|
continue
|
|
}
|
|
|
|
if c != PathSeparator {
|
|
prevWasSeparator = false
|
|
numEscapes = 0
|
|
|
|
continue
|
|
}
|
|
|
|
// Remaining is just path separator handling.
|
|
if numEscapes%2 != 0 {
|
|
// This is an escaped separator.
|
|
prevWasSeparator = false
|
|
numEscapes = 0
|
|
|
|
continue
|
|
}
|
|
|
|
// Ignore leading separator characters and don't add elements that would
|
|
// be empty.
|
|
if !prevWasSeparator {
|
|
res = append(res, segment[startIdx:i])
|
|
}
|
|
|
|
// We don't want to include the path separator in the result.
|
|
startIdx = i + 1
|
|
prevWasSeparator = true
|
|
numEscapes = 0
|
|
}
|
|
|
|
// Add the final segment because the loop above won't catch it. There should
|
|
// be no trailing separator character.
|
|
res = append(res, segment[startIdx:])
|
|
|
|
return res
|
|
}
|