General

Como Scrapear Google Maps Gratis con Python

Te recomiendo que veas estos 2 vídeos:

Primeros pasos con Selenium en Python

Y Como conectar con hojas de calculo de Google Drive con Python

Aquí tenéis el código que he utilizado:

import gspread
import time
from datetime import datetime
import re
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains

class ScrapearGMaps:
    
    data = {}
    worksheet = {}
    
    def __init__(self):
        # Ruta de ChromeDriver
        #self.driver=webdriver.Chrome(executable_path=r"C:\Users\nicolasmarin\Downloads\chromedriver_win32\chromedriver.exe") 
        self.driver = webdriver.Chrome(executable_path="/home/nicolas/Proyectos/python/sheets/chromedriver_linux64/chromedriver")
        
        now = datetime.now()
        today = now.strftime("%Y-%m-%d")

        gc = gspread.service_account(filename='scraping-link-d8434e0ec14a.json')

        # Abrir por titulo
        sh = gc.open("Veterinarios")

        # Seleccionar primera hoja
        self.worksheet = sh.get_worksheet(0)
    
    def scroll_the_page(self, i):
        try:
            section_loading = self.driver.find_element_by_class_name("section-loading")
            while True:
                if i >= len(self.driver.find_elements_by_class_name("place-result-container-place-link")):
                    actions = ActionChains(self.driver)
                    actions.move_to_element(section_loading).perform()
                    time.sleep(2)
                else:
                    break
        except:
            pass
        
    def get_geocoder(self, url_location): # gets geographical lat/long coordinates
        try:
            coords = re.search(r"!3d-?\d\d?\.\d{4,8}!4d-?\d\d?\.\d{4,8}",
                            url_location).group()
            coord = coords.split('!3d')[1]
            return tuple(coord.split('!4d'))
        except (TypeError, AttributeError):
            return ("", "")
        
    def get_name(self):
        try:
            return self.driver.find_element_by_xpath("//h1[contains(@class,'header-title')]").text
        except:
            return ""
        
    def get_address(self):
        try:
            return self.driver.find_element_by_css_selector("[data-item-id='address']").text
        except:
            return ""
        
    def get_phone(self):
        try:
            return self.driver.find_element_by_css_selector("[data-tooltip='Copiar el número de teléfono']").text
        except:
            return ""
        
    def get_website(self):
        try:
            return self.driver.find_element_by_css_selector("[data-item-id='authority']").text
        except:
            return ""

    
    def scrape(self, url):
        try:
            self.driver.get(url)
                        
            time.sleep(2)
            
            element = self.driver.find_element_by_xpath("//button[.//span[text()='Acepto']]")
            element.click()
            
            time.sleep(3)
                        
            for i in range(0,20):
                self.scroll_the_page(i)                
                
                place = self.driver.find_elements_by_class_name("place-result-container-place-link")[i]                
                place.click()
                
                time.sleep(3)
                
                name = self.get_name()
                address = self.get_address()
                phone_number = self.get_phone()
                website = self.get_website()
                coords = self.get_geocoder(self.driver.current_url)
                email = ""
                #if website != "":
                #    email = self.get_email('http://'+website)
                
                print([name, address, phone_number, coords[0], coords[1], website, email])
                
                
                row_index = len(self.worksheet.col_values(1)) + 1
                self.worksheet.update('A'+str(row_index), name)
                self.worksheet.update('B'+str(row_index), address)
                self.worksheet.update('C'+str(row_index), phone_number) 
                self.worksheet.update('D'+str(row_index), coords[0])
                self.worksheet.update('E'+str(row_index), coords[1])
                self.worksheet.update('F'+str(row_index), website)
                self.worksheet.update('G'+str(row_index), email)
                
                element = self.driver.find_element_by_xpath("//button[.//span[text()='Volver a los resultados']]")
                time.sleep(2)
                
                element.click()
                time.sleep(3)
            
        except Exception as e:
            print(e)
        
        time.sleep(10)
        #self.driver.quit()

        return(self.data)

query = "veterianrios murcia"
url = "https://www.google.es/maps/search/"+query.replace(" ", "+")+"/"

gmaps = ScrapearGMaps()
print(gmaps.scrape(url))

También te puede interesar

buymeacoffeeInvítame a un café
TwitchSuscripción GRATIS
Ir arriba