Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
ffd5112
Implement find builtin command
matt-dz Mar 10, 2026
fdfcda6
Address code review findings for find builtin
matt-dz Mar 10, 2026
45331b5
Fix misplaced skip_assert_against_bash in nonexistent.yaml
matt-dz Mar 10, 2026
b3d04ee
Document ReadDir sorted-order design choice and its bash divergence
matt-dz Mar 10, 2026
6188545
Address remaining PR review comments
matt-dz Mar 11, 2026
10e4148
Use fmt.Errorf directly instead of errors.New(fmt.Sprintf())
matt-dz Mar 11, 2026
682a62b
Add 52 comprehensive test scenarios for find builtin
matt-dz Mar 11, 2026
2691ffb
Fix -newer cache bug and address PR review comments
matt-dz Mar 11, 2026
7335e90
Integrate -maxdepth/-mindepth into the expression parser
matt-dz Mar 11, 2026
27b3d5e
Detect symlink loops by file identity (dev+inode) instead of path str…
matt-dz Mar 11, 2026
139d228
Address PR #36 review comments (round 2)
matt-dz Mar 11, 2026
27c52f1
Merge branch 'main' of github.com:DataDog/rshell into matt-dz/impleme…
matt-dz Mar 11, 2026
1698078
Address PR #36 review comments (round 3)
matt-dz Mar 11, 2026
e823302
Address PR #36 review comments (round 4)
matt-dz Mar 11, 2026
c95fb6c
Address PR #36 review comments (round 5)
matt-dz Mar 11, 2026
cd0786f
Address PR #36 review comments (round 6)
matt-dz Mar 11, 2026
4fc005f
Address PR #36 review comments (round 7)
matt-dz Mar 11, 2026
5d1151a
Address PR #36 review comments (round 8)
matt-dz Mar 11, 2026
4afc9fb
Address PR #36 review comments (round 9)
matt-dz Mar 11, 2026
b187f82
Address PR #36 review comments (round 10)
matt-dz Mar 11, 2026
f87d171
Address PR #36 review comments (round 11)
matt-dz Mar 11, 2026
793f6da
Address PR #36 review comments (round 12)
matt-dz Mar 11, 2026
a081c95
Address PR #36 review comments (round 13)
matt-dz Mar 11, 2026
92d809d
Add comprehensive unit tests for find builtin regression prevention
matt-dz Mar 11, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions SHELL_FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Blocked features are rejected before execution with exit code 2.
- ✅ `echo [-n] [-e] [ARG]...` — write arguments to stdout
- ✅ `exit [N]` — exit the shell with status N (default 0)
- ✅ `false` — return exit code 1
- ✅ `find [-L] [PATH...] [EXPRESSION]` — search for files in a directory hierarchy; supports `-name`, `-iname`, `-path`, `-ipath`, `-type`, `-size`, `-empty`, `-newer`, `-mtime`, `-mmin`, `-maxdepth`, `-mindepth`, `-print`, `-print0`, `-prune`, logical operators (`!`, `-a`, `-o`, `()`); blocks `-exec`, `-delete`, `-regex` for sandbox safety
- ✅ `grep [-EFGivclLnHhoqsxw] [-e PATTERN] [-m NUM] [-A NUM] [-B NUM] [-C NUM] PATTERN [FILE]...` — print lines that match patterns; uses RE2 regex engine (linear-time, no backtracking)
- ✅ `head [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the first part of files (default: first 10 lines)
- ✅ `ls [-1aAdFhlpRrSt] [FILE]...` — list directory contents
Expand Down
4 changes: 2 additions & 2 deletions interp/allowed_paths_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,8 @@ func TestAllowedPathsExecNonexistent(t *testing.T) {

func TestAllowedPathsExecViaPathLookup(t *testing.T) {
dir := t.TempDir()
// "find" is resolved via PATH (not absolute), but /bin and /usr are not allowed
_, stderr, exitCode := runScriptInternal(t, `find`, dir,
// "sed" is resolved via PATH (not absolute), but /bin and /usr are not allowed
_, stderr, exitCode := runScriptInternal(t, `sed`, dir,
AllowedPaths([]string{dir}),
)
assert.Equal(t, 127, exitCode)
Expand Down
19 changes: 18 additions & 1 deletion interp/builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,10 @@ type CallContext struct {
OpenFile func(ctx context.Context, path string, flags int, mode os.FileMode) (io.ReadWriteCloser, error)

// ReadDir reads a directory within the shell's path restrictions.
// Entries are returned sorted by name.
// Entries are returned sorted by name. This is an intentional design
// choice for deterministic output, but means builtins that walk
// directories (ls -R, find) produce sorted output rather than the
// filesystem-dependent order used by GNU coreutils/findutils.
ReadDir func(ctx context.Context, path string) ([]fs.DirEntry, error)

// StatFile returns file info within the shell's path restrictions (follows symlinks).
Expand All @@ -107,6 +110,12 @@ type CallContext struct {
// calling time.Now() directly, so the time source is consistent and
// testable.
Now func() time.Time

// FileIdentity extracts canonical file identity from FileInfo.
// On Unix: dev+inode from Stat_t. On Windows: volume serial + file index
// via GetFileInformationByHandle. The path parameter is needed on Windows
// where FileInfo.Sys() lacks identity fields; Unix ignores it.
FileIdentity func(path string, info fs.FileInfo) (FileID, bool)
}

// Out writes a string to stdout.
Expand All @@ -124,6 +133,14 @@ func (c *CallContext) Errf(format string, a ...any) {
fmt.Fprintf(c.Stderr, format, a...)
}

// FileID is a comparable file identity for cycle detection.
// On Unix: device + inode. On Windows: volume serial + file index.
// Used as map key for visited-set tracking.
type FileID struct {
Dev uint64
Ino uint64
}

// Result captures the outcome of executing a builtin command.
type Result struct {
// Code is the exit status code.
Expand Down
179 changes: 179 additions & 0 deletions interp/builtins/find/eval.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2026-present Datadog, Inc.

package find

import (
"context"
iofs "io/fs"
"math"
"time"

"github.com/DataDog/rshell/interp/builtins"
)

// evalResult captures the outcome of evaluating an expression on a file.
type evalResult struct {
matched bool
prune bool // skip descending into this directory
}

// evalContext holds state needed during expression evaluation.
type evalContext struct {
callCtx *builtins.CallContext
ctx context.Context
now time.Time
relPath string // path relative to starting point
info iofs.FileInfo // file info (lstat or stat depending on -L)
depth int // current depth
printPath string // path to print (includes starting point prefix)
newerCache map[string]time.Time // cached -newer reference file modtimes
newerErrors map[string]bool // tracks which -newer reference files failed to stat
followLinks bool // true when -L is active
}

// evaluate evaluates an expression tree against a file. If e is nil, returns
// matched=true (match everything).
func evaluate(ec *evalContext, e *expr) evalResult {
if e == nil {
return evalResult{matched: true}
}
switch e.kind {
case exprAnd:
left := evaluate(ec, e.left)
if !left.matched {
return evalResult{prune: left.prune}
}
right := evaluate(ec, e.right)
return evalResult{matched: right.matched, prune: left.prune || right.prune}

case exprOr:
left := evaluate(ec, e.left)
if left.matched {
return evalResult{matched: true, prune: left.prune}
}
right := evaluate(ec, e.right)
return evalResult{matched: right.matched, prune: left.prune || right.prune}

case exprNot:
r := evaluate(ec, e.operand)
return evalResult{matched: !r.matched, prune: r.prune}

case exprName:
name := baseName(ec.relPath)
return evalResult{matched: matchGlob(e.strVal, name)}

case exprIName:
name := baseName(ec.relPath)
return evalResult{matched: matchGlobFold(e.strVal, name)}

case exprPath:
return evalResult{matched: matchPathGlob(e.strVal, ec.printPath)}

case exprIPath:
return evalResult{matched: matchPathGlobFold(e.strVal, ec.printPath)}

case exprType:
return evalResult{matched: matchType(ec.info, e.strVal)}

case exprSize:
return evalResult{matched: compareSize(ec.info.Size(), e.sizeVal)}

case exprEmpty:
return evalResult{matched: evalEmpty(ec)}

case exprNewer:
return evalResult{matched: evalNewer(ec, e.strVal)}

case exprMtime:
return evalResult{matched: evalMtime(ec, e.numVal, e.numCmp)}

case exprMmin:
return evalResult{matched: evalMmin(ec, e.numVal, e.numCmp)}

case exprPrint:
ec.callCtx.Outf("%s\n", ec.printPath)
return evalResult{matched: true}

case exprPrint0:
ec.callCtx.Outf("%s\x00", ec.printPath)
return evalResult{matched: true}

case exprPrune:
return evalResult{matched: true, prune: true}

case exprTrue:
return evalResult{matched: true}

case exprFalse:
return evalResult{matched: false}

default:
return evalResult{matched: false}
}
}

// evalEmpty returns true if the file is an empty regular file or empty directory.
func evalEmpty(ec *evalContext) bool {
if ec.info.IsDir() {
entries, err := ec.callCtx.ReadDir(ec.ctx, ec.printPath)
if err != nil {
return false
}
return len(entries) == 0
}
if ec.info.Mode().IsRegular() {
return ec.info.Size() == 0
}
return false
}

// evalNewer returns true if the file is newer than the reference file.
// The reference file's modtime is resolved once and cached in newerCache
// to avoid redundant stat calls for every entry in the tree. Errors are
// tracked in newerErrors (shared across all entries) so a failed stat
// consistently returns false for all subsequent entries rather than
// matching against a zero-time sentinel.
func evalNewer(ec *evalContext, refPath string) bool {
// Check if this reference path previously failed to stat.
if ec.newerErrors[refPath] {
return false
}
refTime, ok := ec.newerCache[refPath]
if !ok {
statRef := ec.callCtx.LstatFile
if ec.followLinks {
statRef = ec.callCtx.StatFile
}
refInfo, err := statRef(ec.ctx, refPath)
if err != nil {
ec.callCtx.Errf("find: '%s': %s\n", refPath, ec.callCtx.PortableErr(err))
ec.newerErrors[refPath] = true
return false
}
refTime = refInfo.ModTime()
ec.newerCache[refPath] = refTime
}
return ec.info.ModTime().After(refTime)
}

// evalMtime checks modification time in days.
// -mtime n: file was last modified n*24 hours ago.
func evalMtime(ec *evalContext, n int64, cmp int) bool {
modTime := ec.info.ModTime()
diff := ec.now.Sub(modTime)
days := int64(math.Floor(diff.Hours() / 24))
return compareNumeric(days, n, cmp)
}

// evalMmin checks modification time in minutes.
// GNU find rounds up fractional minutes, so a file 5 seconds old is in
// minute bucket 1, not 0. This uses math.Ceil to match that behavior.
func evalMmin(ec *evalContext, n int64, cmp int) bool {
modTime := ec.info.ModTime()
diff := ec.now.Sub(modTime)
mins := int64(math.Ceil(diff.Minutes()))
return compareNumeric(mins, n, cmp)
}
Loading
Loading