codeanalyzer-python/codeanalyzer/__main__.py at f5329bb6ed009acbbdcfbe0921764c067036b151 · codellm-devkit/codeanalyzer-python · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
from pathlib import Path
from typing import Optional, Annotated

import typer

from codeanalyzer.core import Codeanalyzer
from codeanalyzer.utils import _set_log_level, logger
from codeanalyzer.config import OutputFormat
from codeanalyzer.schema import model_dump_json
from codeanalyzer.options import AnalysisOptions


def main(
    input: Annotated[
        Path, typer.Option("-i", "--input", help="Path to the project root directory.")
    ],
    output: Annotated[
        Optional[Path],
        typer.Option("-o", "--output", help="Output directory for artifacts."),
    ] = None,
    format: Annotated[
        OutputFormat,
        typer.Option(
            "-f",
            "--format",
            help="Output format: json or msgpack.",
            case_sensitive=False,
        ),
    ] = OutputFormat.JSON,
    analysis_level: Annotated[
        int,
        typer.Option("-a", "--analysis-level", help="1: symbol table, 2: call graph (requires --codeql), 3: taint analysis (requires --codeql)."),
    ] = 1,
    using_codeql: Annotated[
        bool, typer.Option("--codeql/--no-codeql", help="Enable CodeQL-based analysis.")
    ] = False,
    taint_config: Annotated[
        Optional[Path],
        typer.Option(
            "--taint-config",
            help="Path to taint analysis configuration file (YAML or JSON). Used with --analysis-level 3.",
        ),
    ] = None,
    taint_use_defaults: Annotated[
        bool,
        typer.Option(
            "--taint-defaults/--no-taint-defaults",
            help=(
                "Controls which taint sources/sinks/sanitizers are active:\n\n"
                "  (no --taint-config)          → built-in defaults only\n"
                "  --taint-config + --taint-defaults  → union of defaults and custom config [default]\n"
                "  --taint-config + --no-taint-defaults → custom config only, replaces all defaults"
            ),
        ),
    ] = True,
    using_ray: Annotated[
        bool,
        typer.Option("--ray/--no-ray", help="Enable Ray for distributed analysis."),
    ] = False,
    rebuild_analysis: Annotated[
        bool,
        typer.Option(
            "--eager/--lazy",
            help="Enable eager or lazy analysis. Defaults to lazy.",
        ),
    ] = False,
    skip_tests: Annotated[
        bool,
        typer.Option(
            "--skip-tests/--include-tests",
            help="Skip test files in analysis.",
        ),
    ] = True,
    file_name: Annotated[
        Optional[Path],
        typer.Option(
            "--file-name",
            help="Analyze only the specified file (relative to input directory).",
        ),
    ] = None,
    cache_dir: Annotated[
        Optional[Path],
        typer.Option(
            "-c",
            "--cache-dir",
            help="Directory to store analysis cache. Defaults to '.codeanalyzer' in the input directory.",
        ),
    ] = None,
    clear_cache: Annotated[
        bool,
        typer.Option(
            "--clear-cache/--keep-cache",
            help="Clear cache after analysis. By default, cache is retained.",
        ),
    ] = False,
    verbosity: Annotated[
        int, typer.Option("-v", count=True, help="Increase verbosity: -v, -vv, -vvv")
    ] = 0,
):
    # Validate analysis level requirements
    if analysis_level >= 2 and not using_codeql:
        logger.error("Analysis levels 2 and 3 require --codeql flag")
        raise typer.Exit(code=1)

    if analysis_level >= 3 and taint_config and not taint_config.exists():
        logger.error(f"Taint configuration file '{taint_config}' does not exist.")
        raise typer.Exit(code=1)

    if not taint_use_defaults and not taint_config:
        logger.error("--no-taint-defaults requires --taint-config (otherwise nothing would be analyzed).")
        raise typer.Exit(code=1)

    options = AnalysisOptions(
        input=input,
        output=output,
        format=format,
        analysis_level=analysis_level,
        using_codeql=using_codeql,
        using_ray=using_ray,
        rebuild_analysis=rebuild_analysis,
        skip_tests=skip_tests,
        file_name=file_name,
        cache_dir=cache_dir,
        clear_cache=clear_cache,
        verbosity=verbosity,
        taint_config=taint_config,
        taint_use_defaults=taint_use_defaults,
    )

    _set_log_level(options.verbosity)
    if not options.input.exists():
        logger.error(f"Input path '{options.input}' does not exist.")
        raise typer.Exit(code=1)

    if options.file_name is not None:
        full_file_path = options.input / options.file_name
        if not full_file_path.exists():
            logger.error(
                f"Specified file '{options.file_name}' does not exist in '{options.input}'."
            )
            raise typer.Exit(code=1)
        if not full_file_path.is_file():
            logger.error(f"Specified path '{options.file_name}' is not a file.")
            raise typer.Exit(code=1)
        if not str(options.file_name).endswith(".py"):
            logger.error(
                f"Specified file '{options.file_name}' is not a Python file (.py)."
            )
            raise typer.Exit(code=1)

    with Codeanalyzer(options) as analyzer:
        artifacts = analyzer.analyze()

        if options.output is None:
            print(model_dump_json(artifacts, separators=(",", ":")))
        else:
            options.output.mkdir(parents=True, exist_ok=True)
            _write_output(artifacts, options.output, options.format)


def _write_output(artifacts, output_dir: Path, format: OutputFormat):
    """Write artifacts to file in the specified format."""
    if format == OutputFormat.JSON:
        output_file = output_dir / "analysis.json"
        # Use Pydantic's model_dump_json() for compact output
        json_str = model_dump_json(artifacts, indent=None)
        with output_file.open("w") as f:
            f.write(json_str)
        logger.info(f"Analysis saved to {output_file}")

    elif format == OutputFormat.MSGPACK:
        output_file = output_dir / "analysis.msgpack"
        msgpack_data = artifacts.to_msgpack_bytes()
        with output_file.open("wb") as f:
            f.write(msgpack_data)
        logger.info(f"Analysis saved to {output_file}")
        logger.info(
            f"Compression ratio: {artifacts.get_compression_ratio():.1%} of JSON size"
        )


app = typer.Typer(
    callback=main,
    name="codeanalyzer",
    help="Static Analysis on Python source code using Jedi, CodeQL and Tree sitter.",
    invoke_without_command=True,
    no_args_is_help=True,
    add_completion=False,
    rich_markup_mode="rich",
    pretty_exceptions_show_locals=False,
)

if __name__ == "__main__":
    app()