New stuff

2026-05-31 16:53:37 +00:00 · 2023-03-02 18:16:59 +01:00
parent 1080868e60
commit 7923212cb8
2 changed files with 39 additions and 1 deletions
--- a/scraper-bs.py
+++ b/scraper-bs.py
@@ -0,0 +1,35 @@
+websites = ["https://garlic-vue.netlify.app/"]
+# create a crawler for websites, that also supports javascript
+# use selenium
+# download the chrome driver from https://chromedriver.chromium.org/downloads
+# and put it in the same folder as this script
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from bs4 import BeautifulSoup
+import time
+
+driverPath = "/home/velocitatem/Documents/Projects/garlic/chromedriver"
+
+
+def get_html(url):
+    options = Options()
+    options.headless = True
+    driver = webdriver.Chrome(executable_path=driverPath, chrome_options=options)
+    driver.get(url)
+    time.sleep(5)
+    html = driver.page_source
+    driver.quit()
+    return html
+
+
+def get_data(html):
+
+    soup = BeautifulSoup(html, "html.parser")
+    # print all the text on the page
+    print(soup.text)
+
+
+
+
+for url in websites:
+    get_data(get_html(url))