openai
/

gpt-oss-20b

Text Generation

8-bit precision

Model card Files Files and versions

gpt-oss-20b / __init__ (20).py

Ananthusajeev190's picture

Ananthusajeev190

Upload 312 files

d2e0b37 verified 1 day ago

1.83 kB

	import requests
	from bs4 import BeautifulSoup

	def scrape_wikipedia_headings(url, output_filename="wiki_headings.txt"):
	"""
	Fetches a Wikipedia page, extracts all headings, and saves them to a file.

	Args:
	url (str): The URL of the Wikipedia page to scrape.
	output_filename (str): The name of the file to save the headings.
	"""
	try:
	# 1. Fetch the HTML content from the specified URL
	print(f"Fetching content from: {url}")
	response = requests.get(url)
	response.raise_for_status() # This will raise an exception for bad status codes (4xx or 5xx)

	# 2. Parse the HTML using BeautifulSoup
	print("Parsing HTML content...")
	soup = BeautifulSoup(response.text, 'html.parser')

	# 3. Find all heading tags (h1, h2, h3)
	headings = soup.find_all(['h1', 'h2', 'h3'])

	if not headings:
	print("No headings found on the page.")
	return

	# 4. Process and save the headings
	print(f"Found {len(headings)} headings. Saving to '{output_filename}'...")
	with open(output_filename, 'w', encoding='utf-8') as f:
	for heading in headings:
	heading_text = heading.get_text().strip()
	line = f"{heading.name}: {heading_text}\n"
	f.write(line)
	print(f" - {line.strip()}")

	print(f"\nSuccessfully scraped and saved headings to '{output_filename}'.")

	except requests.exceptions.RequestException as e:
	print(f"Error fetching the URL: {e}")
	except Exception as e:
	print(f"An unexpected error occurred: {e}")

	# --- Main execution ---
	if __name__ == "__main__":
	wikipedia_url = "https://en.wikipedia.org/wiki/Python_(programming_language)"
	scrape_wikipedia_headings(wikipedia_url)