文件预览

slacking_analyzer.py

查看 Code Analysis Skills 技能包中的文件内容。

文件内容

src/analyzers/slacking_analyzer.py

"""
Slacking Analyzer - Calculates developer "Slacking Index" (摸鱼指数).

This analyzer detects patterns that may indicate low engagement or
"slacking" behaviors by analyzing commit timing, frequency, consistency,
and output quality signals.

The Slacking Index is a composite score from 0 (hardworking) to 100 (slacking).
It is meant to be taken with a grain of humor, but backed by real data.

Signals analyzed:
  - Commit frequency vs active days (sparse = suspicious)
  - Single-line / trivial commit ratio (low-effort commits)
  - Large gaps between commits (disappearing acts)
  - Late-afternoon-only commits (deadline-driven behavior)
  - Low code output relative to active time span
  - Config-only / doc-only commit ratio (avoiding real code)
  - Copy-paste signals (very large additions with no deletions)
  - Friday-heavy / Monday-light patterns
"""

import logging
from collections import defaultdict, Counter
from typing import Dict

from src.analyzers.base_analyzer import BaseAnalyzer

logger = logging.getLogger(__name__)

# Thresholds
TRIVIAL_COMMIT_LINE_THRESHOLD = 5  # commits with <= 5 lines changed
LARGE_GAP_HOURS = 72  # 3 days without commits
COPY_PASTE_RATIO = 10  # added/deleted ratio above this is suspicious


class SlackingAnalyzer(BaseAnalyzer):
    """
    Calculates a composite 'Slacking Index' for each developer.

    The index ranges from 0 to 100:
      - 0-20:  Highly engaged, consistent contributor
      - 21-40: Normal, healthy work pattern
      - 41-60: Some slacking signals detected
      - 61-80: Significant slacking indicators
      - 81-100: Professional slacker detected 🐟
    """

    def analyze(self) -> Dict:
        """
        Analyze slacking signals for each author.

        Returns:
            Dict keyed by author name with slacking metrics and index.
        """
        author_data = defaultdict(lambda: {
            "commit_times": [],
            "commit_dates": [],
            "lines_added": [],
            "lines_deleted": [],
            "files_changed": [],
            "commit_messages": [],
            "file_paths": [],
            "weekdays": [],
        })

        for commit in self._get_commits():
            author = commit.author.name
            data = author_data[author]
            data["commit_times"].append(commit.committer_date)
            data["commit_dates"].append(commit.committer_date.date())
            data["commit_messages"].append(commit.msg)
            data["weekdays"].append(commit.committer_date.weekday())

            total_added = 0
            total_deleted = 0
            files = 0
            paths = []
            for mod in commit.modified_files:
                total_added += mod.added_lines
                total_deleted += mod.deleted_lines
                files += 1
                if mod.new_path:
                    paths.append(mod.new_path)

            data["lines_added"].append(total_added)
            data["lines_deleted"].append(total_deleted)
            data["files_changed"].append(files)
            data["file_paths"].append(paths)

        result = {}
        for author, data in author_data.items():
            total = len(data["commit_times"])
            if total == 0:
                continue

            signals = {}

            # Signal 1: Commit sparsity (few commits over a long active span)
            dates = sorted(data["commit_dates"])
            if len(dates) >= 2:
                span_days = (dates[-1] - dates[0]).days or 1
            else:
                span_days = 1
            unique_days = len(set(dates))
            activity_ratio = unique_days / span_days if span_days > 0 else 1.0
            # Low activity ratio = high slacking signal
            signals["sparsity_score"] = max(0, min(25, round((1 - activity_ratio) * 30)))

            # Signal 2: Trivial commit ratio
            trivial_count = sum(
                1 for a, d in zip(data["lines_added"], data["lines_deleted"])
                if (a + d) <= TRIVIAL_COMMIT_LINE_THRESHOLD
            )
            trivial_ratio = trivial_count / total
            signals["trivial_commit_ratio"] = round(trivial_ratio, 3)
            signals["trivial_score"] = round(trivial_ratio * 20, 1)

            # Signal 3: Large gaps between commits
            sorted_times = sorted(data["commit_times"])
            gap_hours = []
            large_gap_count = 0
            for i in range(1, len(sorted_times)):
                gap = (sorted_times[i] - sorted_times[i - 1]).total_seconds() / 3600
                gap_hours.append(gap)
                if gap > LARGE_GAP_HOURS:
                    large_gap_count += 1
            avg_gap = sum(gap_hours) / len(gap_hours) if gap_hours else 0
            large_gap_ratio = large_gap_count / len(gap_hours) if gap_hours else 0
            signals["large_gap_ratio"] = round(large_gap_ratio, 3)
            signals["disappearance_score"] = round(large_gap_ratio * 20, 1)

            # Signal 4: Low output (total lines per active day)
            total_lines = sum(data["lines_added"]) + sum(data["lines_deleted"])
            lines_per_day = total_lines / unique_days if unique_days > 0 else 0
            # Very low output per day is a signal
            if lines_per_day < 20:
                signals["low_output_score"] = 15
            elif lines_per_day < 50:
                signals["low_output_score"] = 8
            elif lines_per_day < 100:
                signals["low_output_score"] = 3
            else:
                signals["low_output_score"] = 0

            # Signal 5: Config/doc-only commits (avoiding real code work)
            non_code_commits = 0
            for paths_list in data["file_paths"]:
                if paths_list and all(self._is_non_code(p) for p in paths_list):
                    non_code_commits += 1
            non_code_ratio = non_code_commits / total
            signals["non_code_ratio"] = round(non_code_ratio, 3)
            signals["non_code_score"] = round(non_code_ratio * 10, 1)

            # Signal 6: Friday-heavy / Monday-light pattern (procrastination)
            dow_counts = Counter(data["weekdays"])
            friday_count = dow_counts.get(4, 0)
            monday_count = dow_counts.get(0, 0)
            weekday_total = sum(1 for d in data["weekdays"] if d < 5) or 1
            friday_ratio = friday_count / weekday_total
            monday_ratio = monday_count / weekday_total
            # High Friday + Low Monday = deadline-driven
            procrastination = max(0, friday_ratio - monday_ratio)
            signals["procrastination_score"] = round(procrastination * 10, 1)

            # Signal 7: Copy-paste signal (very high added/deleted ratio)
            total_added = sum(data["lines_added"])
            total_deleted = sum(data["lines_deleted"])
            if total_deleted > 0:
                add_delete_ratio = total_added / total_deleted
            else:
                add_delete_ratio = total_added if total_added > 0 else 1
            copy_paste_signal = 1 if add_delete_ratio > COPY_PASTE_RATIO else 0
            signals["copy_paste_score"] = copy_paste_signal * 5

            # Composite Slacking Index (sum of all signals, capped at 100)
            slacking_index = min(100, round(
                signals["sparsity_score"]
                + signals["trivial_score"]
                + signals["disappearance_score"]
                + signals["low_output_score"]
                + signals["non_code_score"]
                + signals["procrastination_score"]
                + signals["copy_paste_score"]
            ))

            # Determine level
            if slacking_index <= 20:
                level = "🔥 Workaholic"
                level_cn = "🔥 工作狂"
            elif slacking_index <= 40:
                level = "✅ Normal"
                level_cn = "✅ 正常"
            elif slacking_index <= 60:
                level = "😏 Suspicious"
                level_cn = "😏 有嫌疑"
            elif slacking_index <= 80:
                level = "🐟 Slacker"
                level_cn = "🐟 摸鱼达人"
            else:
                level = "🏆 Professional Slacker"
                level_cn = "🏆 摸鱼大师"

            result[author] = {
                "slacking_index": slacking_index,
                "slacking_level": level,
                "slacking_level_cn": level_cn,
                "total_commits": total,
                "active_span_days": span_days,
                "unique_active_days": unique_days,
                "activity_ratio": round(activity_ratio, 3),
                "trivial_commit_ratio": signals["trivial_commit_ratio"],
                "large_gap_ratio": signals["large_gap_ratio"],
                "avg_gap_hours": round(avg_gap, 1),
                "lines_per_active_day": round(lines_per_day, 1),
                "non_code_commit_ratio": signals["non_code_ratio"],
                "friday_ratio": round(friday_ratio, 3),
                "monday_ratio": round(monday_ratio, 3),
                "signal_breakdown": {
                    "sparsity": signals["sparsity_score"],
                    "trivial_commits": signals["trivial_score"],
                    "disappearance": signals["disappearance_score"],
                    "low_output": signals["low_output_score"],
                    "non_code": signals["non_code_score"],
                    "procrastination": signals["procrastination_score"],
                    "copy_paste": signals["copy_paste_score"],
                },
            }

        return result

    @staticmethod
    def _is_non_code(filepath: str) -> bool:
        """Check if a file is a non-code file (config, docs, etc.)."""
        path_lower = filepath.lower()
        non_code_exts = [
            ".md", ".rst", ".txt", ".adoc", ".yml", ".yaml", ".json",
            ".toml", ".ini", ".cfg", ".env", ".lock", ".gitignore",
            ".editorconfig", ".prettierrc",
        ]
        non_code_names = [
            "readme", "changelog", "license", "contributing",
            "dockerfile", "makefile", ".github/",
        ]
        return (
            any(path_lower.endswith(ext) for ext in non_code_exts)
            or any(name in path_lower for name in non_code_names)
        )