File size: 1,221 Bytes
8c2d9a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import joblib
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nltk.tokenize import word_tokenize
import re

# Load the model and vectorizer
model = joblib.load("hard_voting_classifier.pkl")
vectorizer = joblib.load("vectorizer.pkl")

# Load custom stopwords
with open("Indonesia_stopwords.txt", "r") as f:
    custom_stopwords = [word.strip() for word in f.readlines()]

def preprocess_data(text):
    """Preprocess the input text."""
    # Case Folding
    text = text.lower()
    
    # Sentence Normalization
    text = re.sub(r'http[s]?://\S+', '', text)
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    
    # Tokenization
    tokens = word_tokenize(text)
    
    # Stopword Removal and Stemming
    stemmer = StemmerFactory().create_stemmer()
    tokens = [stemmer.stem(word) for word in tokens if word not in custom_stopwords]
    
    return ' '.join(tokens)

def predict_sentiment(text):
    """Predict the sentiment of the input text."""
    preprocessed_text = preprocess_data(text)
    vectorized_text = vectorizer.transform([preprocessed_text])
    prediction = model.predict(vectorized_text)
    return "Positive" if prediction[0] == 1 else "Negative"