from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import numpy as np
import time
# A veces, selenium fallaba aleatoriamente
# opt = webdriver.ChromeOptions()
# opt.add_argument('--ignore-certificate-errors')
# opt.add_argument('--ignore-ssl-errors')
# browser = webdriver.Chrome(options=opt) # Create webdriver object
browser = webdriver.Chrome()
browser.maximize_window() #maximize browser
book_id = "409466954"
page_n = 1000 # Un número grande, cualquiera
initiate = False
loged = False
def fn_login():
print("Function login")
browser.get("https://es.scribd.com/") # Get the website
time.sleep(np.random.randint(1, 2))
try:
header_login_btn = browser.find_element(By.XPATH, '//*[@id="global_header"]/div/a[2]')
header_login_btn.click()
time.sleep(1)
email_button = browser.find_element(By.XPATH, '//*[@id="login_drop"]/div/div/div/div/div[2]/div[1]/div[2]/div[2]/a')
email_button.click()
time.sleep(1)
usr_name = "yussel.br@gmail.com"
login_or_email = browser.find_element(By.XPATH, '//*[@id="login_or_email"]')
login_or_email.send_keys(usr_name)
time.sleep(1)
password = "XAv.t8x+HF7wYc4"
login_password = browser.find_element(By.XPATH, '//*[@id="login_password"]')
login_password.send_keys(password)
time.sleep(1)
login = browser.find_element(By.XPATH, '//*[@id="login_drop"]/div/div/div/div/div[2]/div[2]/div[2]/form/fieldset/div[3]/button')
login.click()
time.sleep(16)
loged = True
except:
print("Error login")
goto_book()
def goto_book():
print("Go to book")
browser.get("https://es.scribd.com/book/"
+ book_id);
time.sleep(np.random.randint(1, 2))
try:
read = browser.find_element(By.XPATH, '/html/body/div[2]/div/main/section/section/div[1]/div[2]/a')
read.click()
print("Read book")
except:
if loged == False:
fn_login()
def save_page(side, num):
n = num
time.sleep(np.random.randint(1, 2))
try:
page = "empty"
if side == 'a':
page = browser.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div/div[2]/div[1]/div[2]/div[1]/div/span/img')
else:
page = browser.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div/div[2]/div[1]/div[2]/div[2]/div/span/img')
url_page = page.get_attribute('src')
data_page = requests.get(url_page).content
with open(str(n) + '.png', 'wb') as file:
file.write(data_page)
print("Page " + str(n) + " saved")
n += 1
except:
print("Not found page " + str(n))
return n
print("Start")
goto_book()
i = 0
while i <= page_n:
url = browser.current_url
print(url)
if book_id not in url:
goto_book()
try:
time.sleep(1)
print("Working")
# Find pages
page_counter = browser.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div/span/div/nav/div[2]/div[2]/div').text
page_counter = page_counter.strip() # Trim
# print(page_counter)
pages = page_counter.split(' ')
page_x = int(pages[1]);
# print('De: ' + str(page_x))
page_n = int(pages[3]);
# print('A: ' + str(page_n))
if initiate == False:
# Go to first page
x = 1
while x < page_x:
time.sleep(np.random.randint(4, 6))
prev_btn = browser.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div/div[2]/div[1]/div[1]/button[1]')
prev_btn.click()
x += 1
print("Click " + str(x) + " on prev_btn")
initiate = True
# fullscreen = browser.find_element(By.XPATH, '/html/body/div[3]/div/div[2]/header/div[3]/span[2]/div/a')
# fullscreen.click()
elif page_x < page_n:
# Save pages
time.sleep(np.random.randint(9, 11))
num_page = save_page('a', page_x)
save_page('b', num_page)
next_btn = browser.find_element(By.XPATH, '/html/body/div[3]/div/div[4]/div/div[2]/div[1]/div[1]/button[2]')
next_btn.click()
i += 1
print("Click on next_btn")
except:
print('loged = ' + str(loged))
print(str(i) + ": error")
print("End")