import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import random
import mysql.connector
import requests
from fake_useragent import UserAgent
from dotenv import load_dotenv
import os
from datetime import datetime

# Load environment variables
load_dotenv()
CAPTCHA_API_KEY = os.getenv("2CAPTCHA_API")

# --- Logging Setup ---
log_dir = "logs"
os.makedirs(log_dir, exist_ok=True)
log_path = os.path.join(log_dir, f"{datetime.now().strftime('%Y-%m-%d')}.log")

def log(message):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    full_message = f"[{timestamp}] {message}"
    print(full_message)
    with open(log_path, "a", encoding="utf-8") as f:
        f.write(full_message + "\n")

# --- DB Insert ---
def insert_into_db(query, url, page_number):
    try:
        conn = mysql.connector.connect(
            host="localhost",
            user="root",
            password="",
            database="web_scraper"
        )
        cursor = conn.cursor()
        sql = "INSERT INTO google_search_results (query, url, page_number) VALUES (%s, %s, %s)"
        cursor.execute(sql, (query, url, page_number))
        conn.commit()
        cursor.close()
        conn.close()
    except mysql.connector.Error as err:
        log(f"❌ DB Error: {err}")

def human_scroll(driver):
    pause = random.uniform(0.5, 1.5)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight/3);")
    time.sleep(pause)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight/2);")
    time.sleep(pause)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(pause)

def create_driver():
    try:
        ua = UserAgent()
        user_agent = ua.random
    except:
        user_agent = random.choice([
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36",
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
        ])

    options = uc.ChromeOptions()
    options.headless = False
    options.page_load_strategy = 'eager'
    options.add_argument(f'user-agent={user_agent}')
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--lang=en-US,en;q=0.9")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument(f"--window-size={random.randint(1024,1920)},{random.randint(768,1080)}")

    driver = uc.Chrome(options=options, use_subprocess=True)
    driver.delete_all_cookies()
    return driver

def solve_captcha(site_key, url, data_s=None, cookies=None, proxy=None, proxy_type=None):
    log("🧩 Submitting CAPTCHA to 2Captcha...")

    payload = {
        "key": CAPTCHA_API_KEY,  # Your CAPTCHA API key
        "method": "userrecaptcha",  # Solving reCAPTCHA
        "googlekey": site_key,  # The reCAPTCHA key
        "pageurl": url,  # The URL you are scraping
        "json": 1  # Returning a JSON response
    }

    if data_s:
        payload["data-s"] = data_s

    if cookies:
        payload["cookies"] = cookies

    if proxy:  # Adding proxy details if provided
        payload["proxy"] = proxy  # IP address of the proxy
        if proxy_type:  # Adding proxy type (HTTP/SOCKS4/SOCKS5)
            payload["proxytype"] = proxy_type

    try:
        # Send the request to 2Captcha
        submit_resp = requests.post("http://2captcha.com/in.php", data=payload).json()

        if submit_resp.get("status") != 1:
            log(f"❌ Failed to submit CAPTCHA: {submit_resp}")
            return None

        captcha_id = submit_resp["request"]

        for _ in range(20):
            time.sleep(5)
            res = requests.get("http://2captcha.com/res.php", params={
                "key": CAPTCHA_API_KEY,
                "action": "get",
                "id": captcha_id,
                "json": 1
            }).json()
            if res.get("status") == 1:
                log("✅ CAPTCHA solved.")
                return res["request"]
        log("❌ CAPTCHA solving timed out.")
    except Exception as e:
        log(f"❌ CAPTCHA solving error: {e}")
    return None

def inject_captcha_solution(driver, solution):
    driver.execute_script('document.getElementById("g-recaptcha-response").style.display = "block";')
    driver.execute_script(f'document.getElementById("g-recaptcha-response").innerHTML="{solution}";')
    time.sleep(2)
    driver.execute_script('document.querySelector("form").submit();')

def get_cookie_string(driver):
    cookies = driver.get_cookies()
    return '; '.join([f"{cookie['name']}={cookie['value']}" for cookie in cookies])


def handle_captcha(driver, site_key=None, original_url=None):
    if "recaptcha" in driver.page_source.lower():
        log("⚠️ CAPTCHA detected. Trying to solve...")
        try:
            if not site_key:
                try:
                    site_key = driver.find_element(By.CLASS_NAME, "g-recaptcha").get_attribute("data-sitekey")
                except:
                    try:
                        site_key = driver.find_element(By.XPATH, "//div[@data-sitekey]").get_attribute("data-sitekey")
                    except:
                        log("❌ CAPTCHA site key not found.")
                        return
            page_url = original_url if original_url else driver.current_url
            cookie_string = get_cookie_string(driver)
            solution = solve_captcha(site_key, page_url, cookies=cookie_string)
            if solution:
                inject_captcha_solution(driver, solution)
                time.sleep(10)
        except Exception as e:
            log(f"❌ CAPTCHA handling error: {e}")

def scrape_current_page(driver, query, current_page):
    human_scroll(driver)
    try:
        result_elements = WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.XPATH, "//a[@data-ved and contains(@href, 'http')]"))
        )
    except:
        log(f"⚠️ No results found on page {current_page}.")
        return False

    page_links = [el.get_attribute("href") for el in result_elements if el.get_attribute("href")]
    log(f"✅ Page {current_page}: {len(page_links)} links found.")

    for link in page_links:
        insert_into_db(query, link, current_page)

    return True

def get_organic_google_results(query, num_pages=10, proxy=None, proxy_type=None):
    encoded_query = query.replace(' ', '+')
    driver = create_driver()

    try:
        for page in range(num_pages):
            start = page * 10
            search_url = f"https://www.google.com/search?q={encoded_query}&hl=en&gl=us&num=10&start={start}"
            log(f"🌐 Opening: {search_url}")

            driver.get(search_url)
            time.sleep(random.uniform(6, 10))

            if "sorry" in driver.current_url or "recaptcha" in driver.page_source.lower():
                # Handling CAPTCHA and passing proxy details
                handle_captcha(driver, original_url=search_url, proxy=proxy, proxy_type=proxy_type)

            success = scrape_current_page(driver, query, page + 1)
            if not success:
                log("❌ Skipping to next page due to scraping failure.")

            time.sleep(random.uniform(8, 15))
    finally:
        driver.quit()
        
proxy = "your_proxy_ip:port"  # Example: "192.168.1.100:8888"
proxy_type = "http"  # Or "socks5" depending on your proxy type

# --- Run ---
get_organic_google_results(
    '(intitle:resume OR inurl:resume) ("Sales manager" OR "regional sales manager" OR "sales director") AND ("CRM" OR "business development" OR "account management") (New York OR NYC OR "New York City") -job -jobs -sample -samples -templates',
    num_pages=10
)
