Files
manoon-headless/scripts/gsc-monitoring/monitor.py
Unchained 36915a3f75
Some checks are pending
Build and Deploy / build (push) Waiting to run
feat: add OAuth 2.0 support for GSC monitoring
- Updated monitor.py to support both OAuth and Service Account
- Created setup-oauth-local.py for easy local authorization
- Created cronjob-oauth.yaml for OAuth-based deployment
- Updated README with both authentication options
- OAuth is now the recommended method (no key file needed)
2026-03-30 17:56:49 +02:00

235 lines
7.6 KiB
Python

#!/usr/bin/env python3
"""
Google Search Console Monitoring Script
Monitors search performance, crawl errors, and indexing status
Supports both:
1. Service Account (with JSON key file)
2. OAuth 2.0 (user authentication)
"""
import os
import json
import sys
from datetime import datetime, timedelta
from google.oauth2 import service_account
from google.oauth2.credentials import Credentials as OAuthCredentials
from google.auth.transport.requests import Request
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
# Configuration
SITE_URL = "https://manoonoils.com/"
SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"]
KEY_FILE = os.environ.get("GSC_KEY_FILE", "/etc/gsc-monitoring/service-account.json")
OAUTH_FILE = os.environ.get(
"GSC_OAUTH_FILE", "/etc/gsc-monitoring/oauth-credentials.json"
)
def get_service():
"""Authenticate and return Search Console service"""
# Try OAuth first
if os.path.exists(OAUTH_FILE):
print("Using OAuth authentication...")
with open(OAUTH_FILE, "r") as f:
creds_info = json.load(f)
creds = OAuthCredentials(
token=creds_info["token"],
refresh_token=creds_info["refresh_token"],
token_uri=creds_info["token_uri"],
client_id=creds_info["client_id"],
client_secret=creds_info["client_secret"],
scopes=creds_info["scopes"],
)
# Refresh if expired
if creds.expired:
creds.refresh(Request())
# Save updated credentials
creds_info["token"] = creds.token
with open(OAUTH_FILE, "w") as f:
json.dump(creds_info, f, indent=2)
return build("webmasters", "v3", credentials=creds)
# Fall back to service account
elif os.path.exists(KEY_FILE):
print("Using Service Account authentication...")
credentials = service_account.Credentials.from_service_account_file(
KEY_FILE, scopes=SCOPES
)
return build("webmasters", "v3", credentials=credentials)
else:
raise FileNotFoundError(
f"No credentials found. Please set up either:\n"
f" 1. OAuth: {OAUTH_FILE}\n"
f" 2. Service Account: {KEY_FILE}\n"
f"\nSee README.md for setup instructions."
)
def get_search_analytics(service, days=7):
"""Get search analytics data for the last N days"""
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
try:
request = {
"startDate": start_date,
"endDate": end_date,
"dimensions": ["query", "page"],
"rowLimit": 100,
}
response = (
service.searchanalytics().query(siteUrl=SITE_URL, body=request).execute()
)
return response.get("rows", [])
except HttpError as e:
print(f"Error fetching search analytics: {e}")
return []
def get_crawl_errors(service):
"""Get crawl errors summary"""
try:
response = service.urlcrawlerrorscounts().query(siteUrl=SITE_URL).execute()
return response.get("countPerTypes", [])
except HttpError as e:
print(f"Error fetching crawl errors: {e}")
return []
def get_sitemaps(service):
"""Get sitemap status"""
try:
response = service.sitemaps().list(siteUrl=SITE_URL).execute()
return response.get("sitemap", [])
except HttpError as e:
print(f"Error fetching sitemaps: {e}")
return []
def format_report(analytics, crawl_errors, sitemaps):
"""Format monitoring report"""
report = []
report.append("=" * 70)
report.append("GOOGLE SEARCH CONSOLE MONITORING REPORT")
report.append(f"Site: {SITE_URL}")
report.append(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
report.append("=" * 70)
# Search Analytics Summary
report.append("\n📊 SEARCH ANALYTICS (Last 7 Days)")
report.append("-" * 70)
if analytics:
total_clicks = sum(row["clicks"] for row in analytics)
total_impressions = sum(row["impressions"] for row in analytics)
avg_ctr = sum(row["ctr"] for row in analytics) / len(analytics) * 100
avg_position = sum(row["position"] for row in analytics) / len(analytics)
report.append(f"Total Clicks: {total_clicks:,}")
report.append(f"Total Impressions: {total_impressions:,}")
report.append(f"Average CTR: {avg_ctr:.2f}%")
report.append(f"Average Position: {avg_position:.1f}")
# Top 5 queries
report.append("\n🔍 Top 5 Queries:")
sorted_queries = sorted(analytics, key=lambda x: x["clicks"], reverse=True)[:5]
for i, row in enumerate(sorted_queries, 1):
query = row["keys"][0]
clicks = row["clicks"]
impressions = row["impressions"]
report.append(
f' {i}. "{query}" - {clicks} clicks, {impressions} impressions'
)
else:
report.append("No search analytics data available yet (may take 48-72 hours)")
# Crawl Errors
report.append("\n🚨 CRAWL ERRORS")
report.append("-" * 70)
if crawl_errors:
total_errors = sum(error.get("count", 0) for error in crawl_errors)
if total_errors > 0:
report.append(f"⚠️ Total Errors: {total_errors}")
for error in crawl_errors:
error_type = error.get("platform", "Unknown")
category = error.get("category", "Unknown")
count = error.get("count", 0)
if count > 0:
report.append(f" - {error_type} / {category}: {count}")
else:
report.append("✅ No crawl errors detected!")
else:
report.append("✅ No crawl errors detected!")
# Sitemaps
report.append("\n🗺️ SITEMAPS")
report.append("-" * 70)
if sitemaps:
for sitemap in sitemaps:
path = sitemap.get("path", "Unknown")
is_pending = sitemap.get("isPending", False)
is_sitemap_index = sitemap.get("isSitemapIndex", False)
status = "⏳ Pending" if is_pending else "✅ Processed"
report.append(f" {path}")
report.append(f" Status: {status}")
if not is_sitemap_index and "warnings" in sitemap:
report.append(f" Warnings: {sitemap['warnings']}")
if not is_sitemap_index and "errors" in sitemap:
report.append(f" Errors: {sitemap['errors']} ⚠️")
else:
report.append(
"⚠️ No sitemaps found. Submit your sitemap to Google Search Console!"
)
report.append("\n" + "=" * 70)
return "\n".join(report)
def main():
"""Main monitoring function"""
print("🔍 Starting Google Search Console monitoring...")
try:
service = get_service()
# Gather data
analytics = get_search_analytics(service)
crawl_errors = get_crawl_errors(service)
sitemaps = get_sitemaps(service)
# Generate and print report
report = format_report(analytics, crawl_errors, sitemaps)
print(report)
# Save report to file
report_file = f"/var/log/gsc-monitoring/report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt"
os.makedirs(os.path.dirname(report_file), exist_ok=True)
with open(report_file, "w") as f:
f.write(report)
print(f"\n💾 Report saved to: {report_file}")
except FileNotFoundError as e:
print(f"{e}")
sys.exit(1)
except Exception as e:
print(f"❌ Error: {e}")
sys.exit(1)
if __name__ == "__main__":
main()