We use cookies to ensure you get the best experience on our website.
Wymagania
pip3 install pdfkit
Kod
import pdfkit
def save_webpage_as_pdf(url, output_pdf_path, wkhtmltopdf_path):
"""
Zapisuje stronę internetową jako plik PDF.
:param url: Adres strony do zapisania.
:param output_pdf_path: Ścieżka do pliku PDF, do którego zostanie zapisana strona.
:param wkhtmltopdf_path: Ścieżka do pliku wykonywalnego wkhtmltopdf.
"""
config = pdfkit.configuration(wkhtmltopdf=wkhtmltopdf_path)
pdfkit.from_url(url, output_pdf_path, configuration=config)
print(f'Strona internetowa została zapisana jako {output_pdf_path}')
# Przykład użycia
webpage_url = 'https://www.example.com'
pdf_output_path = 'output.pdf'
wkhtmltopdf_executable_path = r'C:\path\to\wkhtmltopdf.exe'
save_webpage_as_pdf(webpage_url, pdf_output_path, wkhtmltopdf_executable_path)
KOD LINUX
import pdfkit
def save_webpage_as_pdf(url, output_pdf_path):
"""
Zapisuje stronę internetową jako plik PDF.
:param url: Adres strony do zapisania.
:param output_pdf_path: Ścieżka do pliku PDF, do którego zostanie zapisana strona.
"""
pdfkit.from_url(url, output_pdf_path)
print(f'Strona internetowa została zapisana jako {output_pdf_path}')
# Przykład użycia
webpage_url = 'https://www.example.com'
pdf_output_path = 'output.pdf'
save_webpage_as_pdf(webpage_url, pdf_output_path)
Autortzacja login i haslo
import pdfkit
import requests
from requests.auth import HTTPBasicAuth # Importujemy klasę do autoryzacji
def save_webpage_as_pdf(url, output_pdf_path, username, password):
"""
Zapisuje stronę internetową jako plik PDF, uwierzytelniając się za pomocą loginu i hasła.
:param url: Adres strony do zapisania.
:param output_pdf_path: Ścieżka do pliku PDF, do którego zostanie zapisana strona.
:param username: Nazwa użytkownika do autoryzacji.
:param password: Hasło do autoryzacji.
"""
# Tworzymy sesję z uwierzytelnieniem
session = requests.Session()
session.auth = HTTPBasicAuth(username, password)
# Pobieramy zawartość strony z uwierzytelnieniem
response = session.get(url)
# Jeśli żądanie się powiodło, zapisujemy stronę jako PDF
if response.status_code == 200:
pdfkit.from_string(response.text, output_pdf_path)
print(f'Strona internetowa została zapisana jako {output_pdf_path}')
else:
print('Błąd podczas pobierania zawartości strony.')
# Przykład użycia
webpage_url = 'https://www.example.com'
pdf_output_path = 'output.pdf'
username = 'your_username'
password = 'your_password'
save_webpage_as_pdf(webpage_url, pdf_output_path, username, password)
LUB
import requests
from requests.auth import HTTPBasicAuth
from weasyprint import HTML
def save_webpage_as_pdf(url, output_pdf_path, username, password):
"""
Saves a webpage as a PDF, authenticating using username and password.
:param url: URL of the webpage to save.
:param output_pdf_path: Path to the output PDF file.
:param username: Username for authentication.
:param password: Password for authentication.
"""
# Create a session with authentication
session = requests.Session()
session.auth = HTTPBasicAuth(username, password)
# Get the content of the webpage with authentication
response = session.get(url)
# If the request was successful, save the PDF
if response.status_code == 200:
content = response.content
# Generate the PDF using WeasyPrint
html = HTML(string=content, base_url=url)
pdf = html.write_pdf()
# Save the PDF to the output path
with open(output_pdf_path, 'wb') as f:
f.write(pdf)
print(f'Webpage saved as {output_pdf_path}')
else:
print('Error while fetching webpage content.')
# Example usage
webpage_url = 'https://www.example.com'
pdf_output_path = 'output.pdf'
username = 'your_username'
password = 'your_password'
save_webpage_as_pdf(webpage_url, pdf_output_path, username, password)
dodaj aby nazwa pliku byla z tytułu strony
import requests
from requests.auth import HTTPBasicAuth
from weasyprint import HTML
def save_webpage_as_pdf(url, username, password):
"""
Saves a webpage as a PDF, authenticating using username and password.
:param url: URL of the webpage to save.
:param username: Username for authentication.
:param password: Password for authentication.
"""
# Create a session with authentication
session = requests.Session()
session.auth = HTTPBasicAuth(username, password)
# Get the content of the webpage with authentication
response = session.get(url)
# If the request was successful, save the PDF
if response.status_code == 200:
content = response.content
# Generate the PDF using WeasyPrint
html = HTML(string=content, base_url=url)
pdf = html.write_pdf()
# Get the title of the webpage from the response headers
title = response.headers.get('content-disposition')
if title:
title = title.split('filename=')[1].strip('""')
else:
title = 'output'
pdf_output_path = f'{title}.pdf'
# Save the PDF to the output path
with open(pdf_output_path, 'wb') as f:
f.write(pdf)
print(f'Webpage saved as {pdf_output_path}')
else:
print('Error while fetching webpage content.')
# Example usage
webpage_url = 'https://www.example.com'
username = 'your_username'
password = 'your_password'
save_webpage_as_pdf(webpage_url, username, password)
import requests
from requests.auth import HTTPBasicAuth
from weasyprint import HTML
from bs4 import BeautifulSoup
def get_title(html_content):
"""
Extracts the title from HTML content.
:param html_content: HTML content of the webpage.
:return: Title of the webpage.
"""
soup = BeautifulSoup(html_content, 'html.parser')
title_tag = soup.find('title')
return title_tag.string.strip() if title_tag else 'untitled'
def save_webpage_as_pdf(url, username, password):
"""
Saves a webpage as a PDF, authenticating using username and password.
:param url: URL of the webpage to save.
:param username: Username for authentication.
:param password: Password for authentication.
"""
# Create a session with authentication
session = requests.Session()
session.auth = HTTPBasicAuth(username, password)
# Get the content of the webpage with authentication
response = session.get(url)
# If the request was successful, save the PDF
if response.status_code == 200:
content = response.content
# Get the title of the webpage
title = get_title(content)
pdf_output_path = f'{title}.pdf'
# Generate the PDF using WeasyPrint
html = HTML(string=content, base_url=url)
pdf = html.write_pdf()
# Save the PDF to the output path
with open(pdf_output_path, 'wb') as f:
f.write(pdf)
print(f'Webpage saved as {pdf_output_path}')
else:
print('Error while fetching webpage content.')
# Example usage
webpage_url = 'https://www.example.com'
username = 'your_username'
password = 'your_password'
save_webpage_as_pdf(webpage_url, username, password)
Cześć Podróżniku!
Ta strona ma nie być typowym poradnikiem w IT, Głównym jej cel to zapisanie krótkich notatek, które mogą się przydać w codziennym życiu podczas korzystania/konfiguracji różnych urządzeń np. Ustawienia DHCP na Routerze Cisco, Ustawieniu Karty sieciowej na Linuxie itp.
Wszelkie prawa zastrzeżone
Dodaj komentarz