Como Scrapear Google Maps Gratis con Python
Te recomiendo que veas estos 2 vídeos:
Primeros pasos con Selenium en Python
Y Como conectar con hojas de calculo de Google Drive con Python
Aquí tenéis el código que he utilizado:
import gspread
import time
from datetime import datetime
import re
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
#PATH = '../Scrapeo/chromedriver'
#driver = webdriver.Chrome(PATH)
#driver.get("https://www.google.com")
class ScrapearGMaps:
data = {}
worksheet = {}
def __init__(self):
# Ruta de ChromeDriver
#self.driver=webdriver.Chrome(executable_path=r"C:\Users\nicolasmarin\Downloads\chromedriver_win32\chromedriver.exe")
self.driver = webdriver.Chrome(service=Service("/Users/luispalma/Documentos /Trabajo /Scrapeo/chromedriver"))
#self.driver = webdriver.Chrome(executable_path="../Scrapeo/chromedriver")
now = datetime.now()
today = now.strftime("%Y-%m-%d")
gc = gspread.service_account(filename='scraping-link-341712-b02b438fad16.json')
# Abrir por titulo
sh = gc.open("Empresas")
# Seleccionar primera hoja
self.worksheet = sh.get_worksheet(0)
def scroll_the_page(self, i):
try:
#section_loading = self.driver.find_element_by_class_name("section-loading")
section_loading = self.driver.find_element(By.CLASS_NAME, "section-loading")
while True:
if i >= len(self.driver.find_elements(By.CLASS_NAME, "place-result-container-place-link")):
#if i >= len(self.driver.find_elements(By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[3]/div/a')):
actions = ActionChains(self.driver)
actions.move_to_element(section_loading).perform()
time.sleep(2)
else:
break
except:
pass
def get_geocoder(self, url_location): # gets geographical lat/long coordinates
try:
coords = re.search(r"!3d-?\d\d?\.\d{4,8}!4d-?\d\d?\.\d{4,8}",
url_location).group()
coord = coords.split('!3d')[1]
return tuple(coord.split('!4d'))
except (TypeError, AttributeError):
return ("", "")
def get_name(self):
try:
return self.driver.find_element(By.XPATH, "//h1[contains(@class,'header-title')]").text
except:
return ""
def get_address(self):
try:
return self.driver.find_element(By.CSS_SELECTOR, "[data-item-id='address']").text
except:
return ""
def get_phone(self):
try:
return self.driver.find_element(By.CSS_SELECTOR, "[data-tooltip='Copiar el número de teléfono']").text
except:
return ""
def get_website(self):
try:
return self.driver.find_element(By.CSS_SELECTOR, "[data-item-id='authority']").text
except:
return ""
def scrape(self, url):
try:
self.driver.get(url)
time.sleep(2)
element = self.driver.find_element(By.XPATH, "//button[.//span[text()='I agree']]")
element.click()
time.sleep(3)
for i in range(0,20):
self.scroll_the_page(i)
place = self.driver.find_elements(By.CLASS_NAME, "place-result-container-place-link")[i]
#place = self.driver.find_element(By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[3]/div/a')[i]
place.click()
time.sleep(3)
name = self.get_name()
address = self.get_address()
phone_number = self.get_phone()
website = self.get_website()
coords = self.get_geocoder(self.driver.current_url)
email = ""
#if website != "":
# email = self.get_email('http://'+website)
print([name, address, phone_number, coords[0], coords[1], website, email])
row_index = len(self.worksheet.col_values(1)) + 1
self.worksheet.update('A'+str(row_index), name)
self.worksheet.update('B'+str(row_index), address)
self.worksheet.update('C'+str(row_index), phone_number)
self.worksheet.update('D'+str(row_index), coords[0])
self.worksheet.update('E'+str(row_index), coords[1])
self.worksheet.update('F'+str(row_index), website)
self.worksheet.update('G'+str(row_index), email)
element = self.driver.find_element(By.XPATH, "//button[.//span[text()='Volver a los resultados']]")
time.sleep(2)
element.click()
time.sleep(3)
except Exception as e:
print(e)
time.sleep(10)
#self.driver.quit()
return(self.data)
query = "veterinaria murcia"
url = "https://www.google.es/maps/search/"+query.replace(" ", "+")+"/"
gmaps = ScrapearGMaps()
print(gmaps.scrape(url))
Gracias Luis Enrique Palma por la actualización de este código.