Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions git_analytics/sources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .git_commit_adapter import GitCommitSource
from .git_log_adapter import GitLogSource

__all__ = [
"GitCommitSource",
"GitLogSource",
]
204 changes: 102 additions & 102 deletions git_analytics/sources/git_log_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,118 +5,118 @@
from typing import Iterable, Iterator

from git_analytics.entities import AnalyticsCommit
from git_analytics.interfaces import CommitSource

# headers
# commit header lines
_RE_COMMIT = re.compile(r"^commit\s+(?P<sha>[0-9a-f]{7,40})\s*$", re.I)
_RE_AUTHOR = re.compile(r"^Author:\s*(?P<name>.+?)\s*<(?P<email>[^>]+)>\s*$", re.I)
_RE_AUTHOR = re.compile(
r"^Author:\s*(?P<name>[^<]+?)(?:\s*<(?P<email>[^>]+)>)?\s*$",
re.I,
)
_RE_DATE = re.compile(r"^Date:\s*(?P<dt>.+?)\s*$", re.I)
# numstat lines: "<insertions>\t<deletions>\t<path>"
# numstat: "<insertions>\t<deletions>\t<path>"
_RE_NUMSTAT = re.compile(r"^\s*(?P<ins>-|\d+)\s+(?P<del>-|\d+)\s+(?P<path>.+)$")


def _parse_dt_iso(s: str) -> datetime:
# Example: 2025-08-16T16:35:39+02:00 — works fine with fromisoformat
return datetime.fromisoformat(s.strip())


def _yield_commits(lines: Iterable[str]) -> Iterator[AnalyticsCommit]:
sha: str | None = None
author: str | None = None
dt: datetime | None = None

# collect only the first non-empty message line (subject)
subject: str | None = None
in_message_block = False
headers_done = False

ins_total = 0
del_total = 0
files_changed = 0

def flush():
nonlocal sha, author, dt, subject, in_message_block, headers_done
nonlocal ins_total, del_total, files_changed
if not sha:
return
yield AnalyticsCommit(
sha=sha,
commit_author=author or "Unknown",
committed_datetime=dt or _parse_dt_iso("1970-01-01T00:00:00+00:00"),
lines_insertions=ins_total,
lines_deletions=del_total,
files_changed=files_changed,
message=subject or "",
)
# reset state for the next commit
sha = author = subject = None
dt = None
in_message_block = False
headers_done = False
ins_total = del_total = files_changed = 0

for raw in lines:
line = raw.rstrip("\n")

# start of a new commit
m = _RE_COMMIT.match(line)
if m:
# flush the previous commit block
yield from flush()
sha = m.group("sha")
continue

if sha and not headers_done:
ma = _RE_AUTHOR.match(line)
if ma:
# you only have one commit_author field: join "Name <email>"
name = ma.group("name").strip()
email = ma.group("email").strip()
author = f"{name} <{email}>"
# optional merge line we just skip
_RE_MERGE = re.compile(r"^Merge:\s+", re.I)


class GitLogSource(CommitSource):
def __init__(self, text: str) -> None:
self._text = text

def iter_commits(self) -> Iterator[AnalyticsCommit]:
yield from self.yield_commits(self._text.splitlines())

@staticmethod
def yield_commits(lines: Iterable[str]) -> Iterator[AnalyticsCommit]:
sha: str | None = None
author_name: str | None = None
dt: datetime | None = None

subject: str | None = None
in_headers = False
in_message = False

ins_total = 0
del_total = 0
files_changed = 0

def flush():
nonlocal sha, author_name, dt, subject, in_headers, in_message
nonlocal ins_total, del_total, files_changed
if not sha:
return
committed_dt = dt if dt is not None else datetime.fromtimestamp(0)
yield AnalyticsCommit(
sha=sha,
commit_author=author_name or "Unknown",
committed_datetime=committed_dt,
lines_insertions=ins_total,
lines_deletions=del_total,
files_changed=files_changed,
message=subject or "",
)
sha = None
author_name = None
dt = None
subject = None
in_headers = False
in_message = False
ins_total = 0
del_total = 0
files_changed = 0

for raw in lines:
line = raw.rstrip("\n")

m_commit = _RE_COMMIT.match(line)
if m_commit:
yield from flush()
sha = m_commit.group("sha")
in_headers = True
in_message = False
continue

md = _RE_DATE.match(line)
if md:
dt = _parse_dt_iso(md.group("dt"))
continue
if sha and in_headers:
if _RE_MERGE.match(line):
continue

if line.strip() == "":
# empty line separates headers from commit message
headers_done = True
in_message_block = True
continue
m_author = _RE_AUTHOR.match(line)
if m_author:
author_name = m_author.group("name").strip()
continue

if sha and in_message_block:
# take the first non-empty line as subject
if line.strip():
# if it's already a numstat line — then there's no message
mn = _RE_NUMSTAT.match(line)
if mn:
in_message_block = False
# don't continue — let it be processed as numstat below
else:
subject = line.strip()
# later lines may be body text — skip until first numstat
m_date = _RE_DATE.match(line)
if m_date:
dt_str = m_date.group("dt").strip()
dt = datetime.fromisoformat(dt_str)
continue
else:
# ignore empty lines in the body
continue

if sha:
# numstat (may appear right after headers or after subject)
mn = _RE_NUMSTAT.match(line)
if mn:
ins_s, del_s = mn.group("ins"), mn.group("del")
ins = int(ins_s) if ins_s.isdigit() else 0 # '-' для бинарников
dels = int(del_s) if del_s.isdigit() else 0
ins_total += ins
del_total += dels
files_changed += 1
continue
if line.strip() == "":
in_headers = False
in_message = True
continue

# flush the last commit
yield from flush()
if sha and in_message:
if line.strip():
if _RE_NUMSTAT.match(line):
in_message = False
else:
subject = line.strip()
continue
else:
continue

if sha:
m_ns = _RE_NUMSTAT.match(line)
if m_ns:
ins_s, del_s = m_ns.group("ins"), m_ns.group("del")
ins = int(ins_s) if ins_s.isdigit() else 0
dels = int(del_s) if del_s.isdigit() else 0
ins_total += ins
del_total += dels
files_changed += 1
in_message = False
continue

def text_commits_to_analytics_commits(path_to_file: str) -> list[AnalyticsCommit]:
with open(path_to_file, "r", encoding="utf-8") as f:
return list(_yield_commits(f))
yield from flush()
2 changes: 1 addition & 1 deletion git_analytics/static/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ <h1 class="h3 mb-4">Other statistics</h1>
</main>
<footer class="bg-dark text-white py-3 mt-auto">
<div class="container">
<span>&copy; 2025 ver 0.1.10</span>
<span>&copy; 2025 ver 0.1.11</span>
</div>
</footer>
<script src="js/bootstrap.bundle.min.js"></script>
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "git-analytics"
version = "0.1.10"
version = "0.1.11"
description = "Advanced analytics for Git repositories — commits, authors, code churn, lines of code, trends, and visual dashboards."
authors = ["n0rfas <n0rfas@protonmail.com>"]
license = "MIT"
Expand Down