diff options
-rw-r--r-- | chadcrawler.py | 41 | ||||
-rw-r--r-- | chadscraper.py | 29 |
2 files changed, 70 insertions, 0 deletions
diff --git a/chadcrawler.py b/chadcrawler.py new file mode 100644 index 0000000..cf39e92 --- /dev/null +++ b/chadcrawler.py @@ -0,0 +1,41 @@ +import requests +from bs4 import BeautifulSoup +import csv +import time + +visited_urls = set() +csv_filename = "crawled_data.csv" + +def crawl(url, depth=2): + if depth == 0 or url in visited_urls: + return + try: + response = requests.get(url, timeout=5) + response.raise_for_status() + except requests.RequestException: + return + + visited_urls.add(url) + soup = BeautifulSoup(response.text, "html.parser") + + # Extract title and all links + title = soup.title.string if soup.title else "No Title" + links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].startswith("http")] + + # Save to CSV + save_to_csv([url, title]) + + # Crawl next links + for link in links: + crawl(link, depth - 1) + time.sleep(1) # Delay to avoid overloading the server + +def save_to_csv(data): + with open(csv_filename, "a", newline="", encoding="utf-8") as file: + writer = csv.writer(file) + writer.writerow(data) + +# Start crawling +start_url = "https://example.com" +crawl(start_url) +print("Crawling finished. Data saved in", csv_filename) diff --git a/chadscraper.py b/chadscraper.py new file mode 100644 index 0000000..79e5ab6 --- /dev/null +++ b/chadscraper.py @@ -0,0 +1,29 @@ +import requests +from bs4 import BeautifulSoup +import csv + +def scrape_website(url, csv_filename): + # Send GET request + response = requests.get(url) + response.raise_for_status() + + # Parse the webpage + soup = BeautifulSoup(response.text, 'html.parser') + + # Extract relevant data (modify according to target site) + data = [] + for item in soup.find_all('div', class_='some-class'): # Change 'some-class' accordingly + title = item.find('h2').text.strip() + description = item.find('p').text.strip() + data.append([title, description]) + + # Save data to CSV + with open(csv_filename, 'w', newline='', encoding='utf-8') as file: + writer = csv.writer(file) + writer.writerow(['Title', 'Description']) # Header row + writer.writerows(data) + + print(f"Data saved to {csv_filename}") + +# Example usage +scrape_website('https://example.com', 'scraped_data.csv') |