vilarin commited on
Commit
254517f
1 Parent(s): 1d4c579

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -7
app.py CHANGED
@@ -1,9 +1,3 @@
1
- import subprocess
2
- subprocess.run(
3
- 'pip install flash-attn --no-build-isolation',
4
- env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"},
5
- shell=True
6
- )
7
  import os
8
  import time
9
  import spaces
@@ -43,7 +37,6 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL)
43
  model = AutoModelForCausalLM.from_pretrained(
44
  MODEL,
45
  torch_dtype=torch.bfloat16,
46
- attn_implementation="flash_attention_2",
47
  device_map="auto",
48
  ignore_mismatched_sizes=True)
49
 
 
 
 
 
 
 
 
1
  import os
2
  import time
3
  import spaces
 
37
  model = AutoModelForCausalLM.from_pretrained(
38
  MODEL,
39
  torch_dtype=torch.bfloat16,
 
40
  device_map="auto",
41
  ignore_mismatched_sizes=True)
42