Trying to extract data from the site . There's a "next" button that needs to be clicked in order to scrape the contents. However, I'm facing difficulty in identifying the correct xpath or css selector for this button which is preventing me from progressing with the scraping process. Any assistance would be greatly appreciated as I'm currently stuck at this point. Below is the code snippet I have been working with, but it's not yielding the desired outcomes.
# -*- coding: utf-8 -*-
import scrapy import scrapy_selenium from scrapy_selenium import SeleniumRequest
class VisionSpider(scrapy.Spider): name = 'vision'
def start_requests(self):
yield SeleniumRequest(
url= 'https://tonaton.com',
wait_time=3,
screenshot=True,
callback=self.parse
)
def parse(self, response):
businesses = response.xpath("//a[@class='link--1t8hM gtm-home-category-link-click']")
for business in businesses:
link = business.xpath(".//@href").get()
category = business.xpath(".//div[2]/p/text()").get()
yield response.follow(url=link, callback=self.parse_business, meta={'business_category': category})
def parse_business(self, response):
category = response.request.meta['business_category']
rows = response.xpath("//a[@class='card-link--3ssYv gtm-ad-item']")
for row in rows:
new_link = row.xpath(".//@href").get()
yield response.follow(url=new_link, callback=self.next_parse, meta={'business_category': category})
next_page = response.xpath("//div[@class = 'action-button--1O8tU']")
if next_page:
button = next_page.click()
yield SeleniumRequest(
url=button,
wait_time=3,
callback=self.parse
)
def next_parse(self, response):
category = response.request.meta['business_category']
lines = response.xpath("//a[@class='member-link--IzDly gtm-visit-shop']")
for line in lines:
next_link = line.xpath(".//@href").get()
yield response.follow(url=next_link, callback=self.another_parse, meta={'business_category': category})
def another_parse(self, response):
category = response.request.meta['business_category']
button = response.xpath("//button[@class = 'contact-section--1qlvP gtm-show-number']").click()
yield response.follow(url=button, callback=self.new_parse, meta={'business_category': category})
def new_parse(self, response):
category = response.request.meta['business_category']
times = response.xpath("//div[@class='info-container--3pMhK']")
for time in times:
name = time.xpath(".//div/span/text()").get()
location = time.xpath(".//div/div/div/span/text()").get()
phone = time.xpath(".//div[3]/div/button/div[2]/div/text()").get()
yield {
'business_category': category,
'business_name': name,
'phone': phone,
'location': location
}