import requests
import mysql.connector
import os
import time
from datetime import datetime
from dotenv import load_dotenv

load_dotenv(override=True)

# DataForSEO credentials
DFS_USER = os.getenv("DATAFORSEO_USER") or "wcutler@premierwritinggroup.com"
DFS_PASS = os.getenv("DATAFORSEO_PASS") or "beb2b7301122a432"
AUTH = (DFS_USER, DFS_PASS)

log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
log_path = os.path.join(log_dir, f"{datetime.now().strftime('%Y-%m-%d')}.log")

def log(message):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    full_message = f"[{timestamp}] {message}"
    print(full_message)
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(full_message + "\n")


# --- DB Helpers ---
def get_pending_queries():
    conn = mysql.connector.connect(
        host=os.getenv("HOST"),
        user=os.getenv("USER"),
        password=os.getenv("PASSWORD"),
        database=os.getenv("DATABASE")
    )
    cursor = conn.cursor(dictionary=True)
    cursor.execute("SELECT id, query_text FROM resume_queries WHERE status = 'pending'")
    rows = cursor.fetchall()
    cursor.close()
    conn.close()
    return rows

def get_queries_with_task():
    conn = mysql.connector.connect(
        host=os.getenv("HOST"),
        user=os.getenv("USER"),
        password=os.getenv("PASSWORD"),
        database=os.getenv("DATABASE")
    )
    cursor = conn.cursor(dictionary=True)
    cursor.execute("SELECT id, query_text, task_id FROM resume_queries WHERE task_status=0")
    rows = cursor.fetchall()
    cursor.close()
    conn.close()
    return rows

def save_task_id_to_db(query_id, task_id):
    conn = mysql.connector.connect(
        host=os.getenv("HOST"),
        user=os.getenv("USER"),
        password=os.getenv("PASSWORD"),
        database=os.getenv("DATABASE")
    )
    cursor = conn.cursor()
    cursor.execute("UPDATE resume_queries SET task_id=%s, status='done' WHERE id=%s", (task_id, query_id))
    conn.commit()
    cursor.close()
    conn.close()

def insert_result_url(query, url, page_number, query_id):
    conn = mysql.connector.connect(
        host=os.getenv("HOST"),
        user=os.getenv("USER"),
        password=os.getenv("PASSWORD"),
        database=os.getenv("DATABASE")
    )
    cursor = conn.cursor()
    sql = """
        INSERT INTO google_search_results (query, url, page_number, resume_query_id)
        VALUES (%s,%s,%s,%s)
    """
    cursor.execute(sql, (query, url, page_number, query_id))
    conn.commit()
    cursor.close()
    conn.close()

def mark_urls_processed(query_id):
    """Mark all URLs for a query as processed (task_status = 1)."""
    conn = mysql.connector.connect(
        host=os.getenv("HOST"),
        user=os.getenv("USER"),
        password=os.getenv("PASSWORD"),
        database=os.getenv("DATABASE")
    )
    cursor = conn.cursor()
    cursor.execute("UPDATE resume_queries SET task_status=1 WHERE id=%s", (query_id,))
    conn.commit()
    cursor.close()
    conn.close()

def mark_query_done(query_id):
    conn = mysql.connector.connect(
        host=os.getenv("HOST"),
        user=os.getenv("USER"),
        password=os.getenv("PASSWORD"),
        database=os.getenv("DATABASE")
    )
    cursor = conn.cursor()
    cursor.execute("UPDATE resume_queries SET status='done' WHERE id=%s", (query_id,))
    conn.commit()
    cursor.close()
    conn.close()


# --- DataForSEO API ---
def create_dataforseo_task(query_text, query_id):
    url = "https://api.dataforseo.com/v3/serp/google/organic/task_post"
    payload = [
        {
            "language_name": "English",
            "location_name": "United States",
            "keyword": query_text,
            "depth": 100,
            "max_crawl_pages": 10
        }
    ]
    resp = requests.post(url, auth=AUTH, json=payload)
    data = resp.json()
    if resp.status_code == 200 and data.get("tasks"):
        task_id = data["tasks"][0]["id"]
        log(f"✅ Created DataForSEO task {task_id} for query {query_id}")
        save_task_id_to_db(query_id, task_id)
    else:
        log(f"❌ Failed to create task for query {query_id}: {data}")


def fetch_task_results(task_id, query_text, query_id):
    url = f"https://api.dataforseo.com/v3/serp/google/organic/task_get/regular/{task_id}"
    resp = requests.get(url, auth=AUTH)
    data = resp.json()

    if resp.status_code == 200 and data.get("tasks"):
        task = data["tasks"][0]
        result_list = task.get("result", [])

        if result_list and isinstance(result_list, list) and result_list[0].get("items"):
            results = result_list[0].get("items", [])
            if not isinstance(results, list):
                log(f"⚠️ Unexpected format for results in task {task_id}: {type(results)}")
                return

            page_map = {}
            for item in results:
                if item.get("type") == "organic" and "url" in item:
                    position = item.get("rank_group", 0)
                    page_number = (position - 1) // 10 + 1
                    insert_result_url(query_text, item["url"], page_number, query_id)
                    page_map.setdefault(page_number, 0)
                    page_map[page_number] += 1

            log(f"✅ Stored {len(results)} results for query {query_id}")

            # ✅ Mark URLs as processed (task_status = 1)
            mark_urls_processed(query_id)

            # ✅ Mark query as done
            mark_query_done(query_id)

        else:
            log(f"⏳ Task {task_id} not ready or no items yet (query_id={query_id})")
    else:
        log(f"❌ Failed fetching results for {task_id}: {data}")

# --- Continuous runner ---
def run_all_pending_queries():
    pending = get_pending_queries()
    for q in pending:
        create_dataforseo_task(q["query_text"], q["id"])


def process_in_progress_tasks():
    in_progress = get_queries_with_task()
    print(in_progress)
    print("get the task")
    for q in in_progress:
        print("feteching results")
        fetch_task_results(q["task_id"], q["query_text"], q["id"])


if __name__ == "__main__":
    while True:
        log("🔁 Checking for new/pending queries...")
        run_all_pending_queries()
        print("process_in_progress_tasks")
        process_in_progress_tasks()
        log("⏳ Sleeping 5 minutes before next check...\n")
        time.sleep(140)
