robertou2 commited on
Commit
a671d2f
1 Parent(s): 4d9f951

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tweepy as tw
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import torch
5
+ import numpy as np
6
+ import re
7
+
8
+ from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification,AdamW
10
+ tokenizer = AutoTokenizer.from_pretrained('hackathon-pln-es/twitter_sexismo-finetuned-exist2021-metwo')
11
+ model = AutoModelForSequenceClassification.from_pretrained("hackathon-pln-es/twitter_sexismo-finetuned-exist2021-metwo")
12
+
13
+ import torch
14
+ if torch.cuda.is_available():
15
+ device = torch.device("cuda")
16
+ print('I will use the GPU:', torch.cuda.get_device_name(0))
17
+
18
+ else:
19
+ print('No GPU available, using the CPU instead.')
20
+ device = torch.device("cpu")
21
+
22
+ consumer_key = st.secrets["consumer_key"]
23
+ consumer_secret = st.secrets["consumer_secret"]
24
+ access_token = st.secrets["access_token"]
25
+ access_token_secret = st.secrets["access_token_secret"]
26
+ auth = tw.OAuthHandler(consumer_key, consumer_secret)
27
+ auth.set_access_token(access_token, access_token_secret)
28
+ api = tw.API(auth, wait_on_rate_limit=True)
29
+
30
+
31
+
32
+
33
+ st.title('Analisis de comentarios sexistas en Twitter con Tweepy and HuggingFace Transformers')
34
+ st.markdown('Esta app utiliza tweepy para descargar tweets de twitter en base a la información de entrada y procesa los tweets usando transformers de HuggingFace para detectar comentarios sexistas. El resultado y los tweets correspondientes se almacenan en un dataframe para mostrarlo que es lo que se ve como resultado')
35
+
36
+ def run():
37
+ with st.form(key='Introduzca nombre'):
38
+ search_words = st.text_input('Introduzca el termino para analizar')
39
+ number_of_tweets = st.number_input('Introduzca número de twweets a analizar. Máximo 50', 0,50,10)
40
+ submit_button = st.form_submit_button(label='Submit')
41
+ if submit_button:
42
+ tweets =tw.Cursor(api.search_tweets,q=search_words).items(number_of_tweets)
43
+ tweet_list = [i.text for i in tweets]
44
+ text= pd.DataFrame(tweet_list)
45
+ text1=text[0].values
46
+ indices1=tokenizer.batch_encode_plus(text1.tolist(),
47
+ max_length=128,
48
+ add_special_tokens=True,
49
+ return_attention_mask=True,
50
+ pad_to_max_length=True,
51
+ truncation=True)
52
+ input_ids1=indices1["input_ids"]
53
+ attention_masks1=indices1["attention_mask"]
54
+ prediction_inputs1= torch.tensor(input_ids1)
55
+ prediction_masks1 = torch.tensor(attention_masks1)
56
+ # Set the batch size.
57
+ batch_size = 25
58
+ # Create the DataLoader.
59
+ prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
60
+ prediction_sampler1 = SequentialSampler(prediction_data1)
61
+ prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
62
+ print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
63
+ # Put model in evaluation mode
64
+ model.eval()
65
+ # Tracking variables
66
+ predictions = []
67
+ # Predict
68
+ for batch in prediction_dataloader1:
69
+ batch = tuple(t.to(device) for t in batch)
70
+ # Unpack the inputs from our dataloader
71
+ b_input_ids1, b_input_mask1 = batch
72
+ # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
73
+ with torch.no_grad():
74
+ # Forward pass, calculate logit predictions
75
+ outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
76
+ logits1 = outputs1[0]
77
+ # Move logits and labels to CPU
78
+ logits1 = logits1.detach().cpu().numpy()
79
+ # Store predictions and true labels
80
+ predictions.append(logits1)
81
+ flat_predictions = [item for sublist in predictions for item in sublist]
82
+ flat_predictions = np.argmax(flat_predictions, axis=1).flatten()#p = [i for i in classifier(tweet_list)]
83
+ df = pd.DataFrame(list(zip(tweet_list, flat_predictions)),columns =['Latest'+str(number_of_tweets)+'Tweets'+' on '+search_words, 'Sexista'])
84
+ df['Sexista']= np.where(df['Sexista']== 0, 'No Sexista', 'Sexista')
85
+ st.table(df)
86
+ #st.write(df)
87
+ run()