DataDog · matt-dz · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026 · Mar 10, 2026
@@ -12,6 +12,7 @@ Blocked features are rejected before execution with exit code 2.
 - ✅ `echo [-n] [-e] [ARG]...` — write arguments to stdout
 - ✅ `exit [N]` — exit the shell with status N (default 0)
 - ✅ `false` — return exit code 1
+- ✅ `find [-L] [PATH...] [EXPRESSION]` — search for files in a directory hierarchy; supports `-name`, `-iname`, `-path`, `-ipath`, `-type`, `-size`, `-empty`, `-newer`, `-mtime`, `-mmin`, `-maxdepth`, `-mindepth`, `-print`, `-print0`, `-prune`, logical operators (`!`, `-a`, `-o`, `()`); blocks `-exec`, `-delete`, `-regex` for sandbox safety
 - ✅ `grep [-EFGivclLnHhoqsxw] [-e PATTERN] [-m NUM] [-A NUM] [-B NUM] [-C NUM] PATTERN [FILE]...` — print lines that match patterns; uses RE2 regex engine (linear-time, no backtracking)
 - ✅ `head [-n N|-c N] [-q|-v] [-z] [FILE]...` — output the first part of files (default: first 10 lines)
 - ✅ `ls [-1aAdFhlpRrSt] [FILE]...` — list directory contents

@@ -96,8 +96,8 @@ func TestAllowedPathsExecNonexistent(t *testing.T) {
 
 func TestAllowedPathsExecViaPathLookup(t *testing.T) {
 	dir := t.TempDir()
-	// "find" is resolved via PATH (not absolute), but /bin and /usr are not allowed
-	_, stderr, exitCode := runScriptInternal(t, `find`, dir,
+	// "sed" is resolved via PATH (not absolute), but /bin and /usr are not allowed
+	_, stderr, exitCode := runScriptInternal(t, `sed`, dir,
 		AllowedPaths([]string{dir}),
 	)
 	assert.Equal(t, 127, exitCode)

@@ -87,7 +87,10 @@ type CallContext struct {
 	OpenFile func(ctx context.Context, path string, flags int, mode os.FileMode) (io.ReadWriteCloser, error)
 
 	// ReadDir reads a directory within the shell's path restrictions.
-	// Entries are returned sorted by name.
+	// Entries are returned sorted by name. This is an intentional design
+	// choice for deterministic output, but means builtins that walk
+	// directories (ls -R, find) produce sorted output rather than the
+	// filesystem-dependent order used by GNU coreutils/findutils.
 	ReadDir func(ctx context.Context, path string) ([]fs.DirEntry, error)
 
 	// StatFile returns file info within the shell's path restrictions (follows symlinks).
@@ -107,6 +110,12 @@ type CallContext struct {
 	// calling time.Now() directly, so the time source is consistent and
 	// testable.
 	Now func() time.Time
+
+	// FileIdentity extracts canonical file identity from FileInfo.
+	// On Unix: dev+inode from Stat_t. On Windows: volume serial + file index
+	// via GetFileInformationByHandle. The path parameter is needed on Windows
+	// where FileInfo.Sys() lacks identity fields; Unix ignores it.
+	FileIdentity func(path string, info fs.FileInfo) (FileID, bool)
 }
 
 // Out writes a string to stdout.
@@ -124,6 +133,14 @@ func (c *CallContext) Errf(format string, a ...any) {
 	fmt.Fprintf(c.Stderr, format, a...)
 }
 
+// FileID is a comparable file identity for cycle detection.
+// On Unix: device + inode. On Windows: volume serial + file index.
+// Used as map key for visited-set tracking.
+type FileID struct {
+	Dev uint64
+	Ino uint64
+}
+
 // Result captures the outcome of executing a builtin command.
 type Result struct {
 	// Code is the exit status code.

@@ -0,0 +1,179 @@
+// Unless explicitly stated otherwise all files in this repository are licensed
+// under the Apache License Version 2.0.
+// This product includes software developed at Datadog (https://www.datadoghq.com/).
+// Copyright 2026-present Datadog, Inc.
+
+package find
+
+import (
+	"context"
+	iofs "io/fs"
+	"math"
+	"time"
+
+	"github.com/DataDog/rshell/interp/builtins"
+)
+
+// evalResult captures the outcome of evaluating an expression on a file.
+type evalResult struct {
+	matched bool
+	prune   bool // skip descending into this directory
+}
+
+// evalContext holds state needed during expression evaluation.
+type evalContext struct {
+	callCtx     *builtins.CallContext
+	ctx         context.Context
+	now         time.Time
+	relPath     string               // path relative to starting point
+	info        iofs.FileInfo        // file info (lstat or stat depending on -L)
+	depth       int                  // current depth
+	printPath   string               // path to print (includes starting point prefix)
+	newerCache  map[string]time.Time // cached -newer reference file modtimes
+	newerErrors map[string]bool      // tracks which -newer reference files failed to stat
+	followLinks bool                 // true when -L is active
+}
+
+// evaluate evaluates an expression tree against a file. If e is nil, returns
+// matched=true (match everything).
+func evaluate(ec *evalContext, e *expr) evalResult {
+	if e == nil {
+		return evalResult{matched: true}
+	}
+	switch e.kind {
+	case exprAnd:
+		left := evaluate(ec, e.left)
+		if !left.matched {
+			return evalResult{prune: left.prune}
+		}
+		right := evaluate(ec, e.right)
+		return evalResult{matched: right.matched, prune: left.prune || right.prune}
+
+	case exprOr:
+		left := evaluate(ec, e.left)
+		if left.matched {
+			return evalResult{matched: true, prune: left.prune}
+		}
+		right := evaluate(ec, e.right)
+		return evalResult{matched: right.matched, prune: left.prune || right.prune}
+
+	case exprNot:
+		r := evaluate(ec, e.operand)
+		return evalResult{matched: !r.matched, prune: r.prune}
+
+	case exprName:
+		name := baseName(ec.relPath)
+		return evalResult{matched: matchGlob(e.strVal, name)}
+
+	case exprIName:
+		name := baseName(ec.relPath)
+		return evalResult{matched: matchGlobFold(e.strVal, name)}
+
+	case exprPath:
+		return evalResult{matched: matchPathGlob(e.strVal, ec.printPath)}
+
+	case exprIPath:
+		return evalResult{matched: matchPathGlobFold(e.strVal, ec.printPath)}
+
+	case exprType:
+		return evalResult{matched: matchType(ec.info, e.strVal)}
+
+	case exprSize:
+		return evalResult{matched: compareSize(ec.info.Size(), e.sizeVal)}
+
+	case exprEmpty:
+		return evalResult{matched: evalEmpty(ec)}
+
+	case exprNewer:
+		return evalResult{matched: evalNewer(ec, e.strVal)}
+
+	case exprMtime:
+		return evalResult{matched: evalMtime(ec, e.numVal, e.numCmp)}
+
+	case exprMmin:
+		return evalResult{matched: evalMmin(ec, e.numVal, e.numCmp)}
+
+	case exprPrint:
+		ec.callCtx.Outf("%s\n", ec.printPath)
+		return evalResult{matched: true}
+
+	case exprPrint0:
+		ec.callCtx.Outf("%s\x00", ec.printPath)
+		return evalResult{matched: true}
+
+	case exprPrune:
+		return evalResult{matched: true, prune: true}
+
+	case exprTrue:
+		return evalResult{matched: true}
+
+	case exprFalse:
+		return evalResult{matched: false}
+
+	default:
+		return evalResult{matched: false}
+	}
+}
+
+// evalEmpty returns true if the file is an empty regular file or empty directory.
+func evalEmpty(ec *evalContext) bool {
+	if ec.info.IsDir() {
+		entries, err := ec.callCtx.ReadDir(ec.ctx, ec.printPath)
+		if err != nil {
+			return false
+		}
+		return len(entries) == 0
+	}
+	if ec.info.Mode().IsRegular() {
+		return ec.info.Size() == 0
+	}
+	return false
+}
+
+// evalNewer returns true if the file is newer than the reference file.
+// The reference file's modtime is resolved once and cached in newerCache
+// to avoid redundant stat calls for every entry in the tree. Errors are
+// tracked in newerErrors (shared across all entries) so a failed stat
+// consistently returns false for all subsequent entries rather than
+// matching against a zero-time sentinel.
+func evalNewer(ec *evalContext, refPath string) bool {
+	// Check if this reference path previously failed to stat.
+	if ec.newerErrors[refPath] {
+		return false
+	}
+	refTime, ok := ec.newerCache[refPath]
+	if !ok {
+		statRef := ec.callCtx.LstatFile
+		if ec.followLinks {
+			statRef = ec.callCtx.StatFile
+		}
+		refInfo, err := statRef(ec.ctx, refPath)
+		if err != nil {
+			ec.callCtx.Errf("find: '%s': %s\n", refPath, ec.callCtx.PortableErr(err))
+			ec.newerErrors[refPath] = true
+			return false
+		}
+		refTime = refInfo.ModTime()
+		ec.newerCache[refPath] = refTime
+	}
+	return ec.info.ModTime().After(refTime)
+}
+
+// evalMtime checks modification time in days.
+// -mtime n: file was last modified n*24 hours ago.
+func evalMtime(ec *evalContext, n int64, cmp int) bool {
+	modTime := ec.info.ModTime()
+	diff := ec.now.Sub(modTime)
+	days := int64(math.Floor(diff.Hours() / 24))
+	return compareNumeric(days, n, cmp)
+}
+
+// evalMmin checks modification time in minutes.
+// GNU find rounds up fractional minutes, so a file 5 seconds old is in
+// minute bucket 1, not 0. This uses math.Ceil to match that behavior.
+func evalMmin(ec *evalContext, n int64, cmp int) bool {
+	modTime := ec.info.ModTime()
+	diff := ec.now.Sub(modTime)
+	mins := int64(math.Ceil(diff.Minutes()))
+	return compareNumeric(mins, n, cmp)
+}