Source code for dwh_auditor.analyzer.zombie
"""ããŧããĢããããĄã¤ãĒãŗã°ãģãããŗãžãŗãīŧæĒäŊŋį¨īŧå¤åŽãã¸ãã¯.
æŗ¨æ: ããŽãĸã¸ãĨãŧãĢ㯠google.cloud.bigquery ãä¸åã¤ãŗããŧãããĻã¯ãĒããžããã
į´į˛ãĒ Python ãã¸ãã¯ãŽãŋã§æ§æããåäŊããšããããĒį§åäŊã§åŽäēãããããĢããžãã
"""
from __future__ import annotations
from datetime import datetime, timezone
from typing import Any, Optional
from dwh_auditor.config import AppConfig
from dwh_auditor.models.result import TableUsageProfile
from dwh_auditor.models.table import TableStorage
_BYTES_PER_GB: float = 1024**3
def _bytes_to_gb(bytes_value: int) -> float:
"""ãã¤ãæ°ã GB ãĢ夿ãã."""
return bytes_value / _BYTES_PER_GB
[docs]
def analyze_table_usage(
tables: list[TableStorage],
usage_stats: dict[str, dict[str, Any]],
config: AppConfig,
now: Optional[datetime] = None, # noqa: UP045
) -> list[TableUsageProfile]:
"""åããŧããĢãŽããããĄã¤ãĢīŧåŠį¨įļæŗã¨ãžãŗãå¤åŽįĩæīŧãčŋã.
Extractor ããåãåãŖãčģŊéãĒ usage_stats 㨠tables ãįĩåããžãã
Args:
tables: TableStorage ãŽãĒãšã (Extractor ããåãåã)
usage_stats: "project.dataset.table" ãããŧã¨ããįĩąč¨čžæ¸ (Extractor ããåãåã)
config: AppConfig ãĒãã¸ã§ã¯ã
now: åēæēã¨ãĒãįžå¨æĨæ (ããšãį¨)
Returns:
TableUsageProfile ãŽãĒãšã (ãšããŦãŧã¸ãĩã¤ãēéé )
"""
zombie_days = config.thresholds.zombie_table_days
if now is None:
now = datetime.now(tz=timezone.utc)
profiles: list[TableUsageProfile] = []
for table in tables:
stat = usage_stats.get(table.full_table_id, {})
last_accessed_at = stat.get("last_accessed_at")
access_count = stat.get("access_count", 0)
top_users = stat.get("top_users", [])
# å¤åŽãã¸ãã¯:
# ä¸åēĻãååžæéå
ã§ãĸã¯ãģãšãããĻããĒãããžãã¯æįĩãĸã¯ãģãšãéžå¤ããæãĒããžãŗã
is_zombie = False
if last_accessed_at is None:
is_zombie = True
else:
diff_days = (now - last_accessed_at).days
if diff_days >= zombie_days:
is_zombie = True
if is_zombie:
profiles.append(
TableUsageProfile(
table=table,
is_zombie=True,
last_accessed_at=last_accessed_at,
top_users=top_users,
access_count_30d=access_count,
size_gb=_bytes_to_gb(table.total_logical_bytes),
)
)
# ãšããŦãŧã¸ã大ããé ãĢãŊãŧãããå¤ãããå ´åã¯ä¸äŊ 100 äģļį¨åēĻãĢåļéãã
profiles.sort(key=lambda x: x.size_gb, reverse=True)
return profiles[:100]