Files
manoon-headless/scripts/gsc-monitoring/monitor.py
Unchained df915ca128 feat: add Google Search Console automated monitoring
- Python monitoring script for daily GSC reports
- Kubernetes CronJob for automated execution
- Tracks search analytics, crawl errors, and sitemap status
- Includes full setup documentation
2026-03-30 17:17:42 +02:00

192 lines
6.2 KiB
Python

#!/usr/bin/env python3
"""
Google Search Console Monitoring Script
Monitors search performance, crawl errors, and indexing status
"""
import os
import json
import sys
from datetime import datetime, timedelta
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Configuration
SITE_URL = "https://manoonoils.com/"
SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"]
KEY_FILE = os.environ.get("GSC_KEY_FILE", "/etc/gsc-monitoring/service-account.json")
def get_service():
"""Authenticate and return Search Console service"""
credentials = service_account.Credentials.from_service_account_file(
KEY_FILE, scopes=SCOPES
)
return build("webmasters", "v3", credentials=credentials)
def get_search_analytics(service, days=7):
"""Get search analytics data for the last N days"""
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
try:
request = {
"startDate": start_date,
"endDate": end_date,
"dimensions": ["query", "page"],
"rowLimit": 100,
}
response = (
service.searchanalytics().query(siteUrl=SITE_URL, body=request).execute()
)
return response.get("rows", [])
except HttpError as e:
print(f"Error fetching search analytics: {e}")
return []
def get_crawl_errors(service):
"""Get crawl errors summary"""
try:
response = service.urlcrawlerrorscounts().query(siteUrl=SITE_URL).execute()
return response.get("countPerTypes", [])
except HttpError as e:
print(f"Error fetching crawl errors: {e}")
return []
def get_sitemaps(service):
"""Get sitemap status"""
try:
response = service.sitemaps().list(siteUrl=SITE_URL).execute()
return response.get("sitemap", [])
except HttpError as e:
print(f"Error fetching sitemaps: {e}")
return []
def format_report(analytics, crawl_errors, sitemaps):
"""Format monitoring report"""
report = []
report.append("=" * 70)
report.append("GOOGLE SEARCH CONSOLE MONITORING REPORT")
report.append(f"Site: {SITE_URL}")
report.append(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
report.append("=" * 70)
# Search Analytics Summary
report.append("\n📊 SEARCH ANALYTICS (Last 7 Days)")
report.append("-" * 70)
if analytics:
total_clicks = sum(row["clicks"] for row in analytics)
total_impressions = sum(row["impressions"] for row in analytics)
avg_ctr = sum(row["ctr"] for row in analytics) / len(analytics) * 100
avg_position = sum(row["position"] for row in analytics) / len(analytics)
report.append(f"Total Clicks: {total_clicks:,}")
report.append(f"Total Impressions: {total_impressions:,}")
report.append(f"Average CTR: {avg_ctr:.2f}%")
report.append(f"Average Position: {avg_position:.1f}")
# Top 5 queries
report.append("\n🔍 Top 5 Queries:")
sorted_queries = sorted(analytics, key=lambda x: x["clicks"], reverse=True)[:5]
for i, row in enumerate(sorted_queries, 1):
query = row["keys"][0]
clicks = row["clicks"]
impressions = row["impressions"]
report.append(
f' {i}. "{query}" - {clicks} clicks, {impressions} impressions'
)
else:
report.append("No search analytics data available yet (may take 48-72 hours)")
# Crawl Errors
report.append("\n🚨 CRAWL ERRORS")
report.append("-" * 70)
if crawl_errors:
total_errors = sum(error.get("count", 0) for error in crawl_errors)
if total_errors > 0:
report.append(f"⚠️ Total Errors: {total_errors}")
for error in crawl_errors:
error_type = error.get("platform", "Unknown")
category = error.get("category", "Unknown")
count = error.get("count", 0)
if count > 0:
report.append(f" - {error_type} / {category}: {count}")
else:
report.append("✅ No crawl errors detected!")
else:
report.append("✅ No crawl errors detected!")
# Sitemaps
report.append("\n🗺️ SITEMAPS")
report.append("-" * 70)
if sitemaps:
for sitemap in sitemaps:
path = sitemap.get("path", "Unknown")
is_pending = sitemap.get("isPending", False)
is_sitemap_index = sitemap.get("isSitemapIndex", False)
status = "⏳ Pending" if is_pending else "✅ Processed"
report.append(f" {path}")
report.append(f" Status: {status}")
if not is_sitemap_index and "warnings" in sitemap:
report.append(f" Warnings: {sitemap['warnings']}")
if not is_sitemap_index and "errors" in sitemap:
report.append(f" Errors: {sitemap['errors']} ⚠️")
else:
report.append(
"⚠️ No sitemaps found. Submit your sitemap to Google Search Console!"
)
report.append("\n" + "=" * 70)
return "\n".join(report)
def main():
"""Main monitoring function"""
print("🔍 Starting Google Search Console monitoring...")
# Check for credentials
if not os.path.exists(KEY_FILE):
print(f"❌ Error: Service account key file not found at {KEY_FILE}")
print("Please set up Google Cloud credentials first.")
sys.exit(1)
try:
service = get_service()
# Gather data
analytics = get_search_analytics(service)
crawl_errors = get_crawl_errors(service)
sitemaps = get_sitemaps(service)
# Generate and print report
report = format_report(analytics, crawl_errors, sitemaps)
print(report)
# Save report to file
report_file = f"/var/log/gsc-monitoring/report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
os.makedirs(os.path.dirname(report_file), exist_ok=True)
with open(report_file, "w") as f:
f.write(report)
print(f"\n💾 Report saved to: {report_file}")
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()