diff --git a/scripts/gsc-monitoring/Dockerfile b/scripts/gsc-monitoring/Dockerfile new file mode 100644 index 0000000..ff98f45 --- /dev/null +++ b/scripts/gsc-monitoring/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11-slim + +WORKDIR /app + +# Install dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy monitoring script +COPY monitor.py . + +# Create log directory +RUN mkdir -p /var/log/gsc-monitoring + +# Run monitoring +CMD ["python", "monitor.py"] diff --git a/scripts/gsc-monitoring/README.md b/scripts/gsc-monitoring/README.md new file mode 100644 index 0000000..cab6e58 --- /dev/null +++ b/scripts/gsc-monitoring/README.md @@ -0,0 +1,185 @@ +# Google Search Console Monitoring Setup Guide + +## Overview +This setup creates an automated monitoring system for Google Search Console that runs daily and generates reports. + +## Prerequisites +1. Google Cloud account +2. Access to Google Search Console for manoonoils.com +3. kubectl access to your Kubernetes cluster + +## Setup Steps + +### Step 1: Create Google Cloud Project + +1. Go to https://console.cloud.google.com +2. Click "Create Project" (or select existing) +3. Name it: `manoonoils-monitoring` +4. Note the Project ID + +### Step 2: Enable Search Console API + +1. In your project, go to "APIs & Services" β†’ "Library" +2. Search for "Google Search Console API" +3. Click "Enable" + +### Step 3: Create Service Account + +1. Go to "IAM & Admin" β†’ "Service Accounts" +2. Click "Create Service Account" +3. Name: `gsc-monitor` +4. Description: `Monitoring service for Google Search Console` +5. Click "Create and Continue" +6. Role: Select "Search Console Viewer" (or "Owner" if not available) +7. Click "Done" + +### Step 4: Create and Download Key + +1. Click on the service account you just created +2. Go to "Keys" tab +3. Click "Add Key" β†’ "Create New Key" +4. Select "JSON" format +5. Click "Create" - this downloads the key file +6. **SAVE THIS FILE SECURELY** - you cannot download it again! + +### Step 5: Add Service Account to Search Console + +1. Go to https://search.google.com/search-console +2. Select your property: `manoonoils.com` +3. Click "Settings" (gear icon) β†’ "Users and Permissions" +4. Click "Add User" +5. Enter the service account email (from the JSON key file, looks like: `gsc-monitor@manoonoils-monitoring.iam.gserviceaccount.com`) +6. Permission level: "Full" +7. Click "Add" + +### Step 6: Store Credentials in Kubernetes + +On your server (doorwaysftw), run: + +```bash +# Copy the JSON key file to the server +scp /path/to/service-account-key.json doorwaysftw:/tmp/ + +# Create the secret in Kubernetes +ssh doorwaysftw "kubectl create secret generic gsc-service-account \ + --namespace=manoonoils \ + --from-file=service-account.json=/tmp/service-account-key.json" + +# Verify the secret was created +ssh doorwaysftw "kubectl get secret gsc-service-account -n manoonoils" +``` + +### Step 7: Build and Deploy + +```bash +# Build the Docker image +cd scripts/gsc-monitoring +docker build -t gcr.io/manoonoils/gsc-monitoring:latest . + +# Push to registry (or use local registry) +docker push gcr.io/manoonoils/gsc-monitoring:latest + +# Deploy to Kubernetes +kubectl apply -f cronjob.yaml + +# Verify it's running +kubectl get cronjob gsc-monitoring -n manoonoils +``` + +### Step 8: Test Manually + +```bash +# Run a manual test +kubectl create job --from=cronjob/gsc-monitoring gsc-test -n manoonoils + +# Check the logs +kubectl logs job/gsc-test -n manoonoils + +# Delete the test job when done +kubectl delete job gsc-test -n manoonoils +``` + +## What It Monitors + +### Daily Reports Include: + +1. **Search Analytics** (Last 7 Days) + - Total clicks and impressions + - Average CTR and position + - Top 5 search queries + +2. **Crawl Errors** + - Number of errors by type + - Platform-specific issues + +3. **Sitemap Status** + - Sitemap processing status + - Warnings and errors + +## Viewing Reports + +Reports are saved to `/var/log/gsc-monitoring/` in the pod and can be accessed: + +```bash +# Get pod name +POD=$(kubectl get pods -n manoonoils -l job-name=gsc-monitoring -o name | head -1) + +# View latest report +kubectl exec $POD -n manoonoils -- cat /var/log/gsc-monitoring/$(kubectl exec $POD -n manoonoils -- ls -t /var/log/gsc-monitoring/ | head -1) +``` + +Or set up log aggregation with your preferred tool. + +## Schedule + +The monitoring runs daily at **9:00 AM UTC**. To change: + +```bash +# Edit the cronjob +kubectl edit cronjob gsc-monitoring -n manoonoils + +# Change the schedule field (cron format) +# Examples: +# "0 */6 * * *" # Every 6 hours +# "0 0 * * 0" # Weekly on Sunday +``` + +## Troubleshooting + +### "Service account key file not found" +- Verify the secret was created: `kubectl get secret gsc-service-account -n manoonoils` +- Check the key is mounted: `kubectl exec deploy/gsc-monitoring -n manoonoils -- ls -la /etc/gsc-monitoring/` + +### "User does not have permission" +- Verify the service account email was added to GSC with "Full" permissions +- Wait 5-10 minutes for permissions to propagate + +### "Site not found" +- Verify the SITE_URL in `monitor.py` matches exactly (with trailing slash) +- Check: https://search.google.com/search-console + +## Security Notes + +- The service account JSON key is stored as a Kubernetes Secret +- The key has read-only access to Search Console data +- Rotate the key every 90 days for security +- Never commit the key file to git + +## Updating the Monitor + +To update the monitoring script: + +1. Edit `monitor.py` +2. Rebuild the Docker image +3. Push to registry +4. Delete and recreate the CronJob: + ```bash + kubectl delete cronjob gsc-monitoring -n manoonoils + kubectl apply -f cronjob.yaml + ``` + +## Support + +For issues or feature requests, check: +- Google Search Console API docs: https://developers.google.com/webmaster-tools/search-console-api-original/v3 +- Google Cloud IAM docs: https://cloud.google.com/iam/docs diff --git a/scripts/gsc-monitoring/cronjob.yaml b/scripts/gsc-monitoring/cronjob.yaml new file mode 100644 index 0000000..6bdaaa9 --- /dev/null +++ b/scripts/gsc-monitoring/cronjob.yaml @@ -0,0 +1,45 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: gsc-monitoring + namespace: manoonoils +spec: + schedule: "0 9 * * *" # Run daily at 9 AM + jobTemplate: + spec: + template: + spec: + containers: + - name: gsc-monitor + image: gcr.io/manoonoils/gsc-monitoring:latest + env: + - name: GSC_KEY_FILE + value: /etc/gsc-monitoring/service-account.json + - name: PYTHONUNBUFFERED + value: "1" + volumeMounts: + - name: gsc-credentials + mountPath: /etc/gsc-monitoring + readOnly: true + - name: logs + mountPath: /var/log/gsc-monitoring + volumes: + - name: gsc-credentials + secret: + secretName: gsc-service-account + - name: logs + emptyDir: {} + restartPolicy: OnFailure +--- +apiVersion: v1 +kind: Secret +metadata: + name: gsc-service-account + namespace: manoonoils +type: Opaque +stringData: + service-account.json: | + # PLACEHOLDER - Replace with actual service account JSON + # Run: kubectl create secret generic gsc-service-account \ + # --namespace=manoonoils \ + # --from-file=service-account.json=/path/to/your/service-account-key.json diff --git a/scripts/gsc-monitoring/monitor.py b/scripts/gsc-monitoring/monitor.py new file mode 100644 index 0000000..f9af1bc --- /dev/null +++ b/scripts/gsc-monitoring/monitor.py @@ -0,0 +1,191 @@ +#!/usr/bin/env python3 +""" +Google Search Console Monitoring Script +Monitors search performance, crawl errors, and indexing status +""" + +import os +import json +import sys +from datetime import datetime, timedelta +from google.oauth2 import service_account +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError + +# Configuration +SITE_URL = "https://manoonoils.com/" +SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"] +KEY_FILE = os.environ.get("GSC_KEY_FILE", "/etc/gsc-monitoring/service-account.json") + + +def get_service(): + """Authenticate and return Search Console service""" + credentials = service_account.Credentials.from_service_account_file( + KEY_FILE, scopes=SCOPES + ) + return build("webmasters", "v3", credentials=credentials) + + +def get_search_analytics(service, days=7): + """Get search analytics data for the last N days""" + end_date = datetime.now().strftime("%Y-%m-%d") + start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d") + + try: + request = { + "startDate": start_date, + "endDate": end_date, + "dimensions": ["query", "page"], + "rowLimit": 100, + } + + response = ( + service.searchanalytics().query(siteUrl=SITE_URL, body=request).execute() + ) + + return response.get("rows", []) + except HttpError as e: + print(f"Error fetching search analytics: {e}") + return [] + + +def get_crawl_errors(service): + """Get crawl errors summary""" + try: + response = service.urlcrawlerrorscounts().query(siteUrl=SITE_URL).execute() + return response.get("countPerTypes", []) + except HttpError as e: + print(f"Error fetching crawl errors: {e}") + return [] + + +def get_sitemaps(service): + """Get sitemap status""" + try: + response = service.sitemaps().list(siteUrl=SITE_URL).execute() + return response.get("sitemap", []) + except HttpError as e: + print(f"Error fetching sitemaps: {e}") + return [] + + +def format_report(analytics, crawl_errors, sitemaps): + """Format monitoring report""" + report = [] + report.append("=" * 70) + report.append("GOOGLE SEARCH CONSOLE MONITORING REPORT") + report.append(f"Site: {SITE_URL}") + report.append(f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + report.append("=" * 70) + + # Search Analytics Summary + report.append("\nπŸ“Š SEARCH ANALYTICS (Last 7 Days)") + report.append("-" * 70) + + if analytics: + total_clicks = sum(row["clicks"] for row in analytics) + total_impressions = sum(row["impressions"] for row in analytics) + avg_ctr = sum(row["ctr"] for row in analytics) / len(analytics) * 100 + avg_position = sum(row["position"] for row in analytics) / len(analytics) + + report.append(f"Total Clicks: {total_clicks:,}") + report.append(f"Total Impressions: {total_impressions:,}") + report.append(f"Average CTR: {avg_ctr:.2f}%") + report.append(f"Average Position: {avg_position:.1f}") + + # Top 5 queries + report.append("\nπŸ” Top 5 Queries:") + sorted_queries = sorted(analytics, key=lambda x: x["clicks"], reverse=True)[:5] + for i, row in enumerate(sorted_queries, 1): + query = row["keys"][0] + clicks = row["clicks"] + impressions = row["impressions"] + report.append( + f' {i}. "{query}" - {clicks} clicks, {impressions} impressions' + ) + else: + report.append("No search analytics data available yet (may take 48-72 hours)") + + # Crawl Errors + report.append("\n🚨 CRAWL ERRORS") + report.append("-" * 70) + + if crawl_errors: + total_errors = sum(error.get("count", 0) for error in crawl_errors) + if total_errors > 0: + report.append(f"⚠️ Total Errors: {total_errors}") + for error in crawl_errors: + error_type = error.get("platform", "Unknown") + category = error.get("category", "Unknown") + count = error.get("count", 0) + if count > 0: + report.append(f" - {error_type} / {category}: {count}") + else: + report.append("βœ… No crawl errors detected!") + else: + report.append("βœ… No crawl errors detected!") + + # Sitemaps + report.append("\nπŸ—ΊοΈ SITEMAPS") + report.append("-" * 70) + + if sitemaps: + for sitemap in sitemaps: + path = sitemap.get("path", "Unknown") + is_pending = sitemap.get("isPending", False) + is_sitemap_index = sitemap.get("isSitemapIndex", False) + + status = "⏳ Pending" if is_pending else "βœ… Processed" + report.append(f" {path}") + report.append(f" Status: {status}") + + if not is_sitemap_index and "warnings" in sitemap: + report.append(f" Warnings: {sitemap['warnings']}") + if not is_sitemap_index and "errors" in sitemap: + report.append(f" Errors: {sitemap['errors']} ⚠️") + else: + report.append( + "⚠️ No sitemaps found. Submit your sitemap to Google Search Console!" + ) + + report.append("\n" + "=" * 70) + + return "\n".join(report) + + +def main(): + """Main monitoring function""" + print("πŸ” Starting Google Search Console monitoring...") + + # Check for credentials + if not os.path.exists(KEY_FILE): + print(f"❌ Error: Service account key file not found at {KEY_FILE}") + print("Please set up Google Cloud credentials first.") + sys.exit(1) + + try: + service = get_service() + + # Gather data + analytics = get_search_analytics(service) + crawl_errors = get_crawl_errors(service) + sitemaps = get_sitemaps(service) + + # Generate and print report + report = format_report(analytics, crawl_errors, sitemaps) + print(report) + + # Save report to file + report_file = f"/var/log/gsc-monitoring/report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.txt" + os.makedirs(os.path.dirname(report_file), exist_ok=True) + with open(report_file, "w") as f: + f.write(report) + print(f"\nπŸ’Ύ Report saved to: {report_file}") + + except Exception as e: + print(f"❌ Error: {e}") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/scripts/gsc-monitoring/requirements.txt b/scripts/gsc-monitoring/requirements.txt new file mode 100644 index 0000000..9174ec0 --- /dev/null +++ b/scripts/gsc-monitoring/requirements.txt @@ -0,0 +1,4 @@ +google-auth>=2.22.0 +google-auth-oauthlib>=1.0.0 +google-auth-httplib2>=0.1.1 +google-api-python-client>=2.95.0