Skip to content
Merged
105 changes: 86 additions & 19 deletions src/dda/cli/info/owners/code/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,56 @@
from dda.utils.fs import Path

if TYPE_CHECKING:
import codeowners

from dda.cli.application import Application


def _find_existing_ancestor(path: Path) -> Path:
"""Walk up from path to find the nearest existing ancestor."""
check = path
while not check.exists():
check = check.parent
return check


def _display_result(app: Application, res: dict[str, list[str | None]], *, json: bool) -> None:
if json:
from json import dumps

app.output(dumps(res))
else:
# Note: paths here are in POSIX format, so they will use / even on Windows
display_res = {path: ", ".join(str(x) for x in owners) for path, owners in res.items()}
app.display_table(display_res, stderr=False)


def _load_owners(path: Path, owners_cache: dict[Path, codeowners.CodeOwners]) -> codeowners.CodeOwners:
if path in owners_cache:
return owners_cache[path]

import codeowners

# Can raise FileNotFoundError
owners = codeowners.CodeOwners(path.read_text(encoding="utf-8"))
owners_cache[path] = owners
return owners


@dynamic_command(short_help="Find code owners of files and directories", features=["codeowners"])
@click.argument(
"paths",
type=click.Path(exists=True, path_type=Path),
type=click.Path(path_type=Path),
required=True,
nargs=-1,
)
Comment thread
Ishirui marked this conversation as resolved.
@click.option(
"--owners",
"-f",
"owners_filepath",
type=click.Path(exists=True, dir_okay=False, path_type=Path),
"owners_path_override",
type=click.Path(dir_okay=False, exists=True, path_type=Path),
help="Path to CODEOWNERS file",
default=".github/CODEOWNERS",
default=None,
)
# TODO: Make this respect any --non-interactive flag or other way to detect CI environment
@click.option(
Expand All @@ -36,26 +69,60 @@
help="Format the output as JSON",
)
@pass_app
def cmd(app: Application, paths: tuple[Path, ...], *, owners_filepath: Path, json: bool) -> None:
def cmd(app: Application, paths: tuple[Path, ...], *, owners_path_override: Path | None, json: bool) -> None:
"""
Gets the code owners for the specified paths.
"""
import codeowners
import os

from dda.cli.info.owners.format import format_path_for_codeowners

owners = codeowners.CodeOwners(owners_filepath.read_text(encoding="utf-8"))
cwd = Path.cwd()

# The codeowners library expects paths to be in a specific format
res = {
(formatted_path := format_path_for_codeowners(path)): [owner[1] for owner in owners.of(formatted_path)]
for path in paths
}
if json:
from json import dumps
# Resolve explicit --owners path from CWD before any CWD changes
if owners_path_override is not None:
owners_path_override = owners_path_override.resolve()

app.output(dumps(res))
else:
# Note: paths here are in POSIX format, so they will use / even on Windows
display_res = {path: ", ".join(owners) for path, owners in res.items()}
app.display_table(display_res, stderr=False)
# Process each path with dynamic repo root detection
res: dict[str, list[str | None]] = {}
owners_cache: dict[Path, codeowners.CodeOwners] = {}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't really get the point of this cache: do we expect several CODEOWNERS files on a single git repo? Otherwise if it's just to limit opening twice the same file because the repo_root is only retrieved within the paths loops from any path, the implementation is probably quite complex. Do you agree we could make is simpler, or do I misunderstand something?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fundamentally yes it's about avoiding opening the same file and instantiating the CodeOwners object multiple times.

I didn't find a simpler way because nothing prevents you from querying multiple files in different repos, i.e. dda info owners code repo1/file repo2/file, so the logic for opening the codeowners file has to go in the loop.

I guess we could prevent that but I wanted the CLI to be as flexible and intuitive as possible

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok if you want to support repo1 and repo2, even if I'm unsure of the use case criticality, you need a dict. This is quite a bit complex for what we need but let's go like that


errors: list[str] = []
for path in paths:
# Avoid resolving symlinks as they might point outside the repo
abs_path = Path(os.path.normpath(cwd / path))

# Determine repo root from the file's location
try:
repo_root = app.tools.git.get_repo_root(_find_existing_ancestor(abs_path))
except ValueError:
msg = f"Could not determine repo root for path: {abs_path}"
Comment thread
Ishirui marked this conversation as resolved.
errors.append(msg)
continue

repo_relative = Path(abs_path.relative_to(repo_root))

# Load CODEOWNERS file from repo root
try:
owners_path = owners_path_override or repo_root / ".github" / "CODEOWNERS"
owners = _load_owners(owners_path, owners_cache)
except FileNotFoundError:
msg = f"CODEOWNERS file not found for {abs_path}: {owners_path} does not exist"
errors.append(msg)
continue

with repo_root.as_cwd():
try:
formatted_path = format_path_for_codeowners(repo_relative)
except ValueError as e:
errors.append(str(e))
continue
resolved_owners = owners.of(formatted_path)
res[formatted_path] = [owner[1] for owner in resolved_owners] if resolved_owners else [None]

Comment on lines +114 to +122
Copy link

Copilot AI Apr 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

owners.of() returning no matches is currently encoded as [None], which changes the command’s previous behavior (empty list) and will display the literal string None in human output. Prefer keeping [] for “no owners” and let the table display an empty cell (or handle the fallback explicitly in display code) to avoid breaking JSON consumers and CLI output expectations.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is better and less confusing - happy to discuss though :)

Maybe we should leave the JSON output as [] and only show None for human output.

if res:
_display_result(app, res, json=json)

if errors:
app.display_error("\n".join(errors), stderr=True)
app.abort(code=1)
35 changes: 35 additions & 0 deletions src/dda/tools/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,41 @@ def get_changes(

return ChangeSet.from_patches(patches)

def get_repo_root(self, path: Path | None = None) -> Path:
"""
Get the root of the Git repository the given path is in.
If no path is provided, the current working directory is used.

If the path is not in a Git repository or does not exist, a ValueError is raised.

Returns:
The root of the Git repository as a dda.utils.fs.Path object.
"""
from dda.utils.fs import Path

if path is None:
path = Path.cwd()

if not path.exists():
msg = f"Path {path} does not exist"
raise ValueError(msg)

if not path.is_dir():
path = path.parent

res = self.capture(
["rev-parse", "--show-toplevel"],
cwd=path,
check=False,
cross_streams=True,
).strip()

if not res or res.startswith("fatal:"):
msg = f"Path {path} is not in a Git repository: {res}"
raise ValueError(msg)

return Path(res)

def _get_working_tree_patch(self) -> str:
from dda.utils.fs import temp_directory

Expand Down
89 changes: 88 additions & 1 deletion tests/cli/info/owners/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@
@pytest.fixture(scope="module", autouse=True)
def install_deps_once(dda):
dda("self", "dep", "sync", "-f", "codeowners")
with patch("dda.cli.base.ensure_features_installed", return_value=None):
with (
patch("dda.cli.base.ensure_features_installed", return_value=None),
patch("dda.tools.git.Git.get_repo_root", side_effect=lambda _path=None: Path.cwd()),
):
yield


Expand Down Expand Up @@ -166,3 +169,87 @@ def test_human_output(
"""
),
)


# --- Subdirectory path resolution tests ---
# These tests verify that paths given relative to CWD are resolved to repo-root-relative paths.

FIXTURES_DIR = Path(__file__).parent / "fixtures"


def _resolved_fixture(name: str) -> Path:
return Path((FIXTURES_DIR / name).resolve())


@pytest.mark.parametrize(
("subdir", "input_paths", "expected_result"),
[
pytest.param(
"subdir1",
["testfile1.txt"],
{"subdir1/testfile1.txt": ["@owner1"]},
id="file",
),
pytest.param(
"subdir1",
["."],
{"subdir1/": ["@owner1"]},
id="current_directory",
),
pytest.param(
"subdir1",
["../subdir2/testfile2.txt"],
{"subdir2/testfile2.txt": ["@owner3"]},
id="parent_traversal",
),
pytest.param(
"subdir1",
["testfile1.txt", "../subdir2/testfile2.txt", "../subdir2"],
{
"subdir1/testfile1.txt": ["@owner1"],
"subdir2/testfile2.txt": ["@owner3"],
"subdir2/": ["@owner2"],
},
id="multiple_paths",
),
],
)
def test_subdirectory_path_resolution(
dda: CliRunner,
subdir: str,
input_paths: list[str],
expected_result: dict[str, list[str]],
) -> None:
fixture_root = _resolved_fixture("test3")
with (fixture_root / subdir).as_cwd(), patch("dda.tools.git.Git.get_repo_root", return_value=fixture_root):
result = dda("info", "owners", "code", "--json", *input_paths)

result.check(
exit_code=0,
stdout_json=expected_result,
)


def test_nonexistent_path_from_subdirectory(
dda: CliRunner,
) -> None:
fixture_root = _resolved_fixture("test3")
with (fixture_root / "subdir1").as_cwd(), patch("dda.tools.git.Git.get_repo_root", return_value=fixture_root):
result = dda("info", "owners", "code", "--json", "nonexistent.go", catch_exceptions=True)

result.check_exit_code(1)


def test_explicit_codeowners_from_subdirectory(
dda: CliRunner,
) -> None:
fixture_root = _resolved_fixture("test3")
with (fixture_root / "subdir1").as_cwd(), patch("dda.tools.git.Git.get_repo_root", return_value=fixture_root):
# custom_CODEOWNERS is at fixtures/custom_CODEOWNERS, two levels up from subdir1
result = dda("info", "owners", "code", "--json", "--owners", "../../custom_CODEOWNERS", "testfile1.txt")

# custom_CODEOWNERS has "* @DataDog/team-everything", which matches subdir1/testfile1.txt
result.check(
exit_code=0,
stdout_json={"subdir1/testfile1.txt": ["@DataDog/team-everything"]},
)
48 changes: 48 additions & 0 deletions tests/tools/git/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import re
from typing import TYPE_CHECKING

import pytest

from dda.utils.fs import Path
from dda.utils.git.changeset import ChangedFile, ChangeSet, ChangeType
from dda.utils.git.commit import GitPersonDetails
Expand Down Expand Up @@ -108,6 +110,52 @@ def test_get_patch(app: Application, temp_repo: Path) -> None:
assert line == pattern


@pytest.mark.parametrize(
"location",
[
"cwd",
"repo_root",
"subdirectory",
"file",
],
)
def test_get_repo_root_from_locations(app: Application, temp_repo: Path, location: str) -> None:
"""get_repo_root() returns repo root when called from cwd, repo root, subdir, or file path."""
expected_root = temp_repo.resolve()

if location == "cwd":
with temp_repo.as_cwd():
root = app.tools.git.get_repo_root()
elif location == "repo_root":
root = app.tools.git.get_repo_root(temp_repo)
elif location == "subdirectory":
subdir = temp_repo / "sub" / "nested"
subdir.mkdir(parents=True)
root = app.tools.git.get_repo_root(subdir)
else: # file
with temp_repo.as_cwd():
app.tools.git.commit_file(Path("somefile.txt"), content="x", commit_message="Add file")
root = app.tools.git.get_repo_root(temp_repo / "somefile.txt")

assert root == expected_root
assert root.is_dir()


def test_get_repo_root_path_does_not_exist(app: Application, temp_dir: Path) -> None:
"""get_repo_root(path) raises ValueError when path does not exist."""
nonexistent = temp_dir / "does-not-exist"
with pytest.raises(ValueError, match=r"Path .* does not exist"):
app.tools.git.get_repo_root(nonexistent)


def test_get_repo_root_not_in_repo(app: Application, temp_dir: Path) -> None:
"""get_repo_root(path) raises ValueError when path is not in a Git repository."""
not_repo = temp_dir / "not-a-repo"
not_repo.mkdir()
with pytest.raises(ValueError, match=r"is not in a Git repository"):
app.tools.git.get_repo_root(not_repo)


def test_get_changes(app: Application, repo_setup_working_tree: tuple[Path, ChangeSet]) -> None:
git: Git = app.tools.git
temp_repo, expected_changeset = repo_setup_working_tree
Expand Down
Loading