Source code for dwh_auditor.analyzer.zombie

"""テãƒŧブãƒĢãƒ—ãƒ­ãƒ•ã‚Ąã‚¤ãƒĒãƒŗã‚°ãƒģãŠã‚ˆãŗã‚žãƒŗãƒ“īŧˆæœĒäŊŋᔍīŧ‰åˆ¤åŽšãƒ­ã‚¸ãƒƒã‚¯.

æŗ¨æ„: こぎãƒĸジãƒĨãƒŧãƒĢは google.cloud.bigquery ã‚’ä¸€åˆ‡ã‚¤ãƒŗãƒãƒŧトしãĻはãĒりぞせん。
ᴔឋãĒ Python ロジックぎãŋで構成し、単äŊ“テ゚トがミãƒĒį§’å˜äŊã§åތäē†ã™ã‚‹ã‚ˆã†ãĢしぞす。
"""

from __future__ import annotations

from datetime import datetime, timezone
from typing import Any, Optional

from dwh_auditor.config import AppConfig
from dwh_auditor.models.result import TableUsageProfile
from dwh_auditor.models.table import TableStorage

_BYTES_PER_GB: float = 1024**3


def _bytes_to_gb(bytes_value: int) -> float:
    """バイト数を GB ãĢ変換する."""
    return bytes_value / _BYTES_PER_GB


[docs] def analyze_table_usage( tables: list[TableStorage], usage_stats: dict[str, dict[str, Any]], config: AppConfig, now: Optional[datetime] = None, # noqa: UP045 ) -> list[TableUsageProfile]: """各テãƒŧブãƒĢãŽãƒ—ãƒ­ãƒ•ã‚Ąã‚¤ãƒĢīŧˆåˆŠį”¨įŠļæŗã¨ã‚žãƒŗãƒ“åˆ¤åŽšįĩæžœīŧ‰ã‚’čŋ”す. Extractor ã‹ã‚‰å—ã‘å–ãŖãŸčģŊ量ãĒ usage_stats と tables をįĩåˆã—ぞす。 Args: tables: TableStorage ぎãƒĒ゚ト (Extractor から受け取る) usage_stats: "project.dataset.table" をキãƒŧとするįĩąč¨ˆčžžæ›¸ (Extractor から受け取る) config: AppConfig ã‚Ēブジェクト now: åŸēæē–とãĒã‚‹įžåœ¨æ—Ĩ時 (ãƒ†ã‚šãƒˆį”¨) Returns: TableUsageProfile ぎãƒĒ゚ト (゚トãƒŦãƒŧジã‚ĩイã‚ē降順) """ zombie_days = config.thresholds.zombie_table_days if now is None: now = datetime.now(tz=timezone.utc) profiles: list[TableUsageProfile] = [] for table in tables: stat = usage_stats.get(table.full_table_id, {}) last_accessed_at = stat.get("last_accessed_at") access_count = stat.get("access_count", 0) top_users = stat.get("top_users", []) # 判厚ロジック: # 一åēĻも取垗期間内でã‚ĸクã‚ģ゚されãĻいãĒい、ぞたは最įĩ‚ã‚ĸクã‚ģ゚が閞値より昔ãĒã‚‰ã‚žãƒŗãƒ“ is_zombie = False if last_accessed_at is None: is_zombie = True else: diff_days = (now - last_accessed_at).days if diff_days >= zombie_days: is_zombie = True if is_zombie: profiles.append( TableUsageProfile( table=table, is_zombie=True, last_accessed_at=last_accessed_at, top_users=top_users, access_count_30d=access_count, size_gb=_bytes_to_gb(table.total_logical_bytes), ) ) # ゚トãƒŦãƒŧジが大きい順ãĢã‚Ŋãƒŧトし、多すぎる場合は上äŊ 100 äģļፋåēĻãĢåˆļ限する profiles.sort(key=lambda x: x.size_gb, reverse=True) return profiles[:100]