robertou2 commited on
Commit
4a941bb
1 Parent(s): 3c17604

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -54
app.py CHANGED
@@ -27,66 +27,61 @@ auth = tw.OAuthHandler(consumer_key, consumer_secret)
27
  auth.set_access_token(access_token, access_token_secret)
28
  api = tw.API(auth, wait_on_rate_limit=True)
29
 
 
 
 
30
  st.title('Analisis de comentarios sexistas en Twitter con Tweepy and HuggingFace Transformers')
31
  st.markdown('Esta app utiliza tweepy para descargar tweets de twitter en base a la información de entrada y procesa los tweets usando transformers de HuggingFace para detectar comentarios sexistas. El resultado y los tweets correspondientes se almacenan en un dataframe para mostrarlo que es lo que se ve como resultado')
32
 
33
- def principal(tweets,number_of_tweets):
34
- tweet_list = [i.text for i in tweets]
35
- text= pd.DataFrame(tweet_list)
36
- text1=text[0].values
37
- indices1=tokenizer.batch_encode_plus(text1.tolist(), max_length=128,add_special_tokens=True,return_attention_mask=True,pad_to_max_length=True,truncation=True)
38
- input_ids1=indices1["input_ids"]
39
- attention_masks1=indices1["attention_mask"]
40
- prediction_inputs1= torch.tensor(input_ids1)
41
- prediction_masks1 = torch.tensor(attention_masks1)
42
- # Set the batch size.
43
- batch_size = 25
44
- # Create the DataLoader.
45
- prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
46
- prediction_sampler1 = SequentialSampler(prediction_data1)
47
- prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
48
- print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
49
- # Put model in evaluation mode
50
- model.eval()
51
- # Tracking variables
52
- predictions = []
53
- # Predict
54
- for batch in prediction_dataloader1:
55
- batch = tuple(t.to(device) for t in batch)
56
- # Unpack the inputs from our dataloader
57
- b_input_ids1, b_input_mask1 = batch
58
- # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
59
- with torch.no_grad():
60
- # Forward pass, calculate logit predictions
61
- outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
62
- logits1 = outputs1[0]
63
- # Move logits and labels to CPU
64
- logits1 = logits1.detach().cpu().numpy()
65
- # Store predictions and true labels
66
- predictions.append(logits1)
67
- flat_predictions = [item for sublist in predictions for item in sublist]
68
- flat_predictions = np.argmax(flat_predictions, axis=1).flatten()#p = [i for i in classifier(tweet_list)]
69
- df = pd.DataFrame(list(zip(tweet_list, flat_predictions)),columns =['Latest'+str(number_of_tweets)+'Tweets'+' on '+search_words, 'Sexista'])
70
- df['Sexista']= np.where(df['Sexista']== 0, 'No Sexista', 'Sexista')
71
- st.table(df)
72
-
73
  def run():
74
  with st.form(key='Introduzca nombre'):
75
- search_words = st.text_input('Introduzca el termino para analizar o Usuario a analizar')
76
  number_of_tweets = st.number_input('Introduzca número de twweets a analizar. Máximo 50', 0,50,10)
77
- submit_button = st.form_submit_button(label='Término')
78
- submit_button1 = st.form_submit_button(label='Usuario')
79
-
80
  if submit_button:
81
- date_since = "2020-09-14"
82
- new_search = search_words + " -filter:retweets"
83
- tweets = tw.Cursor(api.search_tweets,q=new_search,lang="es",since=date_since).items(number_of_tweets)
84
- principal(tweets,number_of_tweets)
85
- #tweets =tw.Cursor(api.search_tweets,q=search_words).items(number_of_tweets)
86
- if submit_button1:
87
- tweets = api.user_timeline(screen_name = screen_name,count=500)
88
- principal(tweets,number_of_tweets)
89
-
90
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  #st.write(df)
92
  run()
 
27
  auth.set_access_token(access_token, access_token_secret)
28
  api = tw.API(auth, wait_on_rate_limit=True)
29
 
30
+
31
+
32
+
33
  st.title('Analisis de comentarios sexistas en Twitter con Tweepy and HuggingFace Transformers')
34
  st.markdown('Esta app utiliza tweepy para descargar tweets de twitter en base a la información de entrada y procesa los tweets usando transformers de HuggingFace para detectar comentarios sexistas. El resultado y los tweets correspondientes se almacenan en un dataframe para mostrarlo que es lo que se ve como resultado')
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  def run():
37
  with st.form(key='Introduzca nombre'):
38
+ search_words = st.text_input('Introduzca el termino para analizar')
39
  number_of_tweets = st.number_input('Introduzca número de twweets a analizar. Máximo 50', 0,50,10)
40
+ submit_button = st.form_submit_button(label='Submit')
 
 
41
  if submit_button:
42
+ tweets =tw.Cursor(api.search_tweets,q=search_words).items(number_of_tweets)
43
+ tweet_list = [i.text for i in tweets]
44
+ text= pd.DataFrame(tweet_list)
45
+ text1=text[0].values
46
+ indices1=tokenizer.batch_encode_plus(text1.tolist(),
47
+ max_length=128,
48
+ add_special_tokens=True,
49
+ return_attention_mask=True,
50
+ pad_to_max_length=True,
51
+ truncation=True)
52
+ input_ids1=indices1["input_ids"]
53
+ attention_masks1=indices1["attention_mask"]
54
+ prediction_inputs1= torch.tensor(input_ids1)
55
+ prediction_masks1 = torch.tensor(attention_masks1)
56
+ # Set the batch size.
57
+ batch_size = 25
58
+ # Create the DataLoader.
59
+ prediction_data1 = TensorDataset(prediction_inputs1, prediction_masks1)
60
+ prediction_sampler1 = SequentialSampler(prediction_data1)
61
+ prediction_dataloader1 = DataLoader(prediction_data1, sampler=prediction_sampler1, batch_size=batch_size)
62
+ print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs1)))
63
+ # Put model in evaluation mode
64
+ model.eval()
65
+ # Tracking variables
66
+ predictions = []
67
+ # Predict
68
+ for batch in prediction_dataloader1:
69
+ batch = tuple(t.to(device) for t in batch)
70
+ # Unpack the inputs from our dataloader
71
+ b_input_ids1, b_input_mask1 = batch
72
+ # Telling the model not to compute or store gradients, saving memory and # speeding up prediction
73
+ with torch.no_grad():
74
+ # Forward pass, calculate logit predictions
75
+ outputs1 = model(b_input_ids1, token_type_ids=None,attention_mask=b_input_mask1)
76
+ logits1 = outputs1[0]
77
+ # Move logits and labels to CPU
78
+ logits1 = logits1.detach().cpu().numpy()
79
+ # Store predictions and true labels
80
+ predictions.append(logits1)
81
+ flat_predictions = [item for sublist in predictions for item in sublist]
82
+ flat_predictions = np.argmax(flat_predictions, axis=1).flatten()#p = [i for i in classifier(tweet_list)]
83
+ df = pd.DataFrame(list(zip(tweet_list, flat_predictions)),columns =['Latest'+str(number_of_tweets)+'Tweets'+' on '+search_words, 'Sexista'])
84
+ df['Sexista']= np.where(df['Sexista']== 0, 'No Sexista', 'Sexista')
85
+ st.table(df)
86
  #st.write(df)
87
  run()