Spaces:

jordigonzm
/

Llama-2-13B

Sleeping

App Files Files Community

jordigonzm commited on 14 days ago

Commit

8d4d3e5

•

1 Parent(s): a0031b9

check installation files

Browse files

Files changed (5) hide show

system/proxy.py +70 -0
system/pycuda_check.py +14 -0
system/pynvml_check.py +19 -0
system/tensorflow_check.py +9 -0
system/torch_check.py +9 -0

system/proxy.py ADDED Viewed

	@@ -0,0 +1,70 @@

+from http.server import BaseHTTPRequestHandler, HTTPServer
+import http.client
+import socket
+# Configuración del servidor backend
+BACKEND_HOST = "localhost"
+BACKEND_PORT = 8080
+class TransparentProxy(BaseHTTPRequestHandler):
+    def do_GET(self):
+        self.proxy_request()
+    def do_POST(self):
+        self.proxy_request()
+    def proxy_request(self):
+        # Modifica la ruta para añadir /v1
+        modified_path = f"/v1{self.path}"
+        print(f"Redirigiendo la solicitud {self.command} a: {modified_path}")
+        # Establece una conexión al backend con un timeout extendido
+        conn = http.client.HTTPConnection(BACKEND_HOST, BACKEND_PORT, timeout=300)  # Timeout de 5 minutos
+        try:
+            # Leer datos del cuerpo si existen
+            content_length = self.headers.get('Content-Length')
+            if content_length:
+                post_data = self.rfile.read(int(content_length))
+                conn.request(self.command, modified_path, body=post_data, headers=self.headers)
+            else:
+                conn.request(self.command, modified_path, headers=self.headers)
+            # Obtener la respuesta del backend
+            backend_response = conn.getresponse()
+            # Enviar la respuesta al cliente
+            self.send_response(backend_response.status, backend_response.reason)
+            # Reenviar todos los encabezados del backend al cliente
+            for key, value in backend_response.getheaders():
+                self.send_header(key, value)
+            self.end_headers()
+            # Reenviar el cuerpo de la respuesta en modo streaming
+            while True:
+                chunk = backend_response.read(1024)
+                if not chunk:
+                    break
+                self.wfile.write(chunk)
+                self.wfile.flush()  # Asegura que cada fragmento se envía inmediatamente al cliente
+        except socket.timeout:
+            self.send_error(504, "Gateway Timeout: El backend no respondió en el tiempo esperado.")
+            print("Error: Tiempo de espera agotado en la solicitud al backend.")
+        except Exception as e:
+            self.send_error(500, f"Error en el proxy: {e}")
+            print(f"Error al manejar la solicitud: {e}")
+        finally:
+            conn.close()
+def run(server_class=HTTPServer, handler_class=TransparentProxy, port=7860):
+    server_address = ('', port)
+    httpd = server_class(server_address, handler_class)
+    print(f"Proxy corriendo en el puerto {port}")
+    httpd.serve_forever()
+if __name__ == "__main__":
+    run()

system/pycuda_check.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import pycuda.driver as cuda
+import pycuda.autoinit
+try:
+    cuda.init()
+    print(f"Detected {cuda.Device.count()} CUDA device(s).")
+    for i in range(cuda.Device.count()):
+        gpu = cuda.Device(i)
+        print(f"Device {i}: {gpu.name()}")
+        print(f"  Compute Capability: {gpu.compute_capability()}")
+        print(f"  Total Memory: {gpu.total_memory() // (1024 ** 2)} MB")
+except cuda.Error as e:
+    print(f"CUDA initialization failed: {e}")

system/pynvml_check.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import pynvml
+try:
+    pynvml.nvmlInit()
+    device_count = pynvml.nvmlDeviceGetCount()
+    print(f"Number of GPUs: {device_count}")
+    for i in range(device_count):
+        handle = pynvml.nvmlDeviceGetHandleByIndex(i)
+        name = pynvml.nvmlDeviceGetName(handle)
+        memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
+        print(f"GPU {i}: {name.decode('utf-8')}")
+        print(f"  Memory Total: {memory_info.total / (1024 ** 2)} MB")
+        print(f"  Memory Free: {memory_info.free / (1024 ** 2)} MB")
+        print(f"  Memory Used: {memory_info.used / (1024 ** 2)} MB")
+    pynvml.nvmlShutdown()
+except pynvml.NVMLError as e:
+    print(f"Failed to initialize NVML: {e}")

system/tensorflow_check.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import tensorflow as tf
+gpus = tf.config.list_physical_devices('GPU')
+if gpus:
+    print(f"Detected {len(gpus)} GPU(s):")
+    for gpu in gpus:
+        print(f"  - {gpu.name}")
+else:
+    print("No GPUs detected by TensorFlow. Check your configuration.")

system/torch_check.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import torch
+if torch.cuda.is_available():
+    print("CUDA is available.")
+    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
+    print(f"CUDA Version: {torch.version.cuda}")
+    print(f"cuDNN Version: {torch.backends.cudnn.version()}")
+else:
+    print("CUDA is not available. Check your driver and CUDA installation.")