ওয়েব পেজ স্ক্রাপ করি

রিকোয়েস্ট পাঠাই

import requests
from bs4 import BeautifulSoup
url = 'https://subslikescript.com/movie/Titanic-120338'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'}
response = requests.get(url,headers=headers) 
soup = BeautifulSoup(response.content,'html.parser')

এখন soup ভ্যারিয়েবল এর মধ্যে html গুলো চলে এসেছে এইরকম html আউটপুট পাবো

html কে soup ভ্যারিয়েবল এ স্টোর করেছি এখন soup ভ্যারিয়েবল কে ব্যবহার করে html এলিমেন্টকে সিলেক্ট করবো

আমাদের ওয়েবসাইট এর html টি নিচের মত

এখন আমরা নিচের নিয়মে স্ক্রাপ করবো :

import requests
from bs4 import BeautifulSoup
# Send a request to the webpage
url = 'https://subslikescript.com/movie/Titanic-120338'
response = requests.get(url)
# Parse the HTML content of the webpage using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
article = soup.find('article',class_='main-article')
h1 = soup.find('h1').get_text()
p = article.find('p',class_='plot').get_text()
transscript = article.find('div',class_='full-script').get_text() 
# Print the parsed HTML content
print(h1)
print(p)
print(transscript)

ব্যাখ্যা :

রিকোয়েস্ট পাঠাই

import requests
from bs4 import BeautifulSoup
# Send a request to the webpage
url = 'https://subslikescript.com/movie/Titanic-120338'
response = requests.get(url)
# Parse the HTML content of the webpage using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')

article কে ধরি

article = soup.find('article',class_='main-article')

article এর মধ্যে h1 কে ধরে প্রিন্ট করি

h1 = article.find('h1').get_text()
print(h1)

article এর মধ্যে p কে ধরে প্রিন্ট করি

p = article.find('p',class_='plot').get_text()
print(p)

article এর মধ্যে div class_=’full-script কে ধরে প্রিন্ট করি

transscript = article.find('div',class_='full-script').get_text() 
print(transscript)

এক্সপোর্ট করি

import requests
from bs4 import BeautifulSoup
# Send a request to the webpage
url = 'https://subslikescript.com/movie/Titanic-120338'
response = requests.get(url)
# Parse the HTML content of the webpage using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
article = soup.find('article',class_='main-article')
h1 = soup.find('h1').get_text()
p = article.find('p',class_='plot').get_text()
transscript = article.find('div',class_='full-script').get_text() 

with open(f'{h1}.txt','w',encoding='utf-8') as file:
    file.write(transscript)

BeautifulSoup

রিকোয়েস্ট পাঠাই

ব্যাখ্যা :

এক্সপোর্ট করি

How can we help?