I am in need of a solution to retrieve the content from dynamically generated pages using ajax on a website, specifically with code written in golang. I have found examples for non-ajax pages but struggling to find one that works for ajax pages. Any guidance or package recommendations would be greatly appreciated! Thank you.
package main
import(
"fmt"
"time"
"net/http"
"github.com/PuerkitoBio/goquery"
"strings"
"strconv"
)
func main() {
var masterURI [1]string
masterURI[0] = "http://uri1"
/*masterURI[1] = "http://uri2"
masterURI[2] = "http://uri3"*/
for _, uri := range masterURI {
doc, err := extractHTML(uri)
check(err)
search := doc.Find(".pagination li a span").Eq(-1)
numPages, err := strconv.Atoi(search.Text())
check(err)
var i int
for i = 1; i <= numPages; i++ {
page := uri + "#/page-" + strconv.Itoa(i)
fmt.Println("\n========> " + page)
doc, err := extractHTML(page)
check(err)
search := doc.Find(".product-name")
for i := range search.Nodes {
product := strings.TrimSpace(search.Eq(i).Text())
fmt.Println(product)
}
}
fmt.Println(" --- ")
}
}
func extractHTML(url string) (doc *goquery.Document, err error) {
userAgent := "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36 OPR/32.0.1948.69";
trans := &http.Transport{
DisableKeepAlives: true,
}
timeout := time.Duration(600 * time.Second)
myClient := &http.Client{Transport: trans, Timeout: timeout}
req, err := http.NewRequest("GET", url, nil)
check(err)
req.Header.Add("User-Agent", userAgent)
resp, err := myClient.Do(req)
req.Close = true
check(err)
if resp.StatusCode == 200 {
doc, err = goquery.NewDocumentFromResponse(resp)
}
check(err)
resp.Body.Close()
return doc, err
}
func check(err error) {
if err != nil {
panic(err)
}
}