From e1f64717c206729258043c5fd063ffc76a5fdcf2 Mon Sep 17 00:00:00 2001 From: Alejandro Lembke Barrientos Date: Fri, 3 Oct 2025 04:01:51 +0000 Subject: [PATCH] Agregando el taller de lunar lander. --- docker-compose.yaml | 2 + proyectos/lunar-lander/Lunar_Lander.ipynb | 551 ++++++++++++++++++++++ 2 files changed, 553 insertions(+) create mode 100644 proyectos/lunar-lander/Lunar_Lander.ipynb diff --git a/docker-compose.yaml b/docker-compose.yaml index 3f4133e..81da847 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -3,6 +3,8 @@ version: '3' services: vscode: image: gitea.p-lao.com/aleleba/vscode:latest + environment: + GLOBAL_ENV_DONT_PROMPT_WSL_INSTALL: 1 volumes: - ./extensions.json:/home/extensions.json - ./custom-scripts:/usr/bin/custom-scripts diff --git a/proyectos/lunar-lander/Lunar_Lander.ipynb b/proyectos/lunar-lander/Lunar_Lander.ipynb new file mode 100644 index 0000000..12e7363 --- /dev/null +++ b/proyectos/lunar-lander/Lunar_Lander.ipynb @@ -0,0 +1,551 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# LUNAR LANDER\n", + "\n", + "El objetivo del juego es simple (¡pero aterrizar no lo es!): ¡aterrizar la nave espacial sana y salva en la plataforma designada! ¡Prepárate para un aterrizaje suave y heroico! 🚀🌕\n" + ], + "metadata": { + "id": "hmWbGiyvNNME" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Reglas y Punteo\n", + "En cada momento del juego, ganas o pierdes puntos (recompensa) dependiendo de cómo te vaya:\n", + "\n", + "**Aterrizaje y velocidad**: Ganas puntos si te acercas a la zona de aterrizaje y vas despacio. Pierdes puntos si te alejas o vas muy rápido.\n", + "\n", + "**Inclinación**: Pierdes puntos si la nave está muy inclinada. ¡Tienes que mantenerla lo más horizontal posible!\n", + "\n", + "**Patas en el suelo**: Ganas **10** puntos por cada pata que toca el suelo en la zona de aterrizaje.\n", + "\n", + "**Motores**: Pierdes puntos por usar los motores: un poquito por los motores laterales y más por el motor principal. ¡Hay que usarlos con cuidado!\n", + "\n", + "**Final del juego**: Si te estrellas, pierdes **100** puntos. Si aterrizas suavemente en la plataforma, ¡ganas **100** puntos extra!\n", + "\n", + "Para considerar que has tenido éxito en un intento (episodio), ¡necesitas conseguir al menos **200** puntos en total!" + ], + "metadata": { + "id": "OvS3IEo_Pmv8" + } + }, + { + "cell_type": "markdown", + "source": [ + "## Instalacion de librerias" + ], + "metadata": { + "id": "GiL_39bYC6dT" + } + }, + { + "cell_type": "markdown", + "source": [], + "metadata": { + "id": "Fg7rH6DJPneG" + } + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "isgdPYBPWrSE", + "collapsed": true + }, + "outputs": [], + "source": [ + "# Permite conectar codigo en C, C++ con Python\n", + "# Requerido por box2d\n", + "!pip install -q swig\n", + "\n", + "# Gymnasium provee entornos de simulacion, controles y califica resultado\n", + "!pip install -q \"gymnasium[classic-control]\"\n", + "!pip install -q gymnasium[box2d]\n", + "\n", + "# Para grabar y reproducir video\n", + "# !pip install moviepy\n", + "!pip install -q pyvirtualdisplay\n", + "\n", + "# Agente DQN (Deep Q-learning), al que entrenaremos\n", + "!pip install -q stable-baselines3" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Variables globales" + ], + "metadata": { + "id": "3l9R8gV2C_ZJ" + } + }, + { + "cell_type": "code", + "source": [ + "ENV_NAME = \"LunarLander-v3\" # Nombre del entorno\n", + "VIDEO_FOLDER = \"./video_prueba_de_vuelo\" # En esta carpeta se guardaran los videos del test de vuelo\n", + "EPISODES = 1 # Numero de episodios a grabar en la prueba de vuelo, se tratara de seleccionar el mejor\n", + "LOG_DIR = \"./tmp/dqn_lunar\" # Carpeta donde se guardarán los registros de entrenamiento (logs)" + ], + "metadata": { + "id": "502iTO5rCz_P" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Entrenando el modelo" + ], + "metadata": { + "id": "Ywk9QTWzEOtc" + } + }, + { + "cell_type": "code", + "source": [ + "# ==============================================================================\n", + "# ENTRENAMIENTO DE UN AGENTE DQN (Stable-Baselines3)\n", + "# ==============================================================================\n", + "\n", + "# Gymnasium provee el entorno, controles y evalua el resultado\n", + "import gymnasium as gym\n", + "from gymnasium.wrappers import RecordVideo\n", + "import os\n", + "# import moviepy.editor as mp # Importamos MoviePy\n", + "\n", + "\n", + "# Agente DQN, al que entrenaremos\n", + "from stable_baselines3 import DQN\n", + "from stable_baselines3.common.env_util import make_vec_env\n", + "from stable_baselines3.common.monitor import Monitor\n", + "\n", + "\n", + "# --- Preparación para el entrenamiento ---\n", + "# La grabación de video solo debe hacerse después del entrenamiento o en un ambiente separado.\n", + "# Para entrenar, usaremos una versión simple del ambiente sin el wrapper de video.\n", + "\n", + "os.makedirs(LOG_DIR, exist_ok=True)\n", + "\n", + "# Crear el ambiente para el entrenamiento (usando Monitor para guardar logs)\n", + "env_train = gym.make(\n", + " ENV_NAME,\n", + " continuous=False,\n", + " gravity=-10,\n", + " enable_wind=False,\n", + " wind_power=15.0,\n", + " turbulence_power=1.5\n", + ")\n", + "env_train = Monitor(env_train, LOG_DIR)\n", + "\n", + "# Stable-Baselines3 funciona mejor con entornos vectorizados\n", + "env_train_vec = make_vec_env(lambda: env_train, n_envs=1)\n", + "\n", + "\n", + "# --- Creación del Modelo DQN ---\n", + "# DQN es un algoritmo de Q-Learning profundo, ideal para ambientes discretos (como LunarLander-v3)\n", + "model = DQN(\n", + " \"MlpPolicy\", # Tipo de red neuronal (Multi-layer perceptron)\n", + " env_train_vec, # El ambiente de entrenamiento\n", + " learning_rate=0.0001, # Tasa de aprendizaje (0.00001 y 0.001)\n", + " buffer_size=10000, # (10000 - 50000)\n", + " learning_starts=5000,# (1000 - 10000)\n", + " batch_size=64, # Puede ser [32, 64, 128]\n", + " gamma=0.99, # Factor de descuento (0.90 - 0.99) menor=quiero recompensas rapido, mayor=espera recompensas mayores siendo mas cuidadoso\n", + " verbose=1, # Mostrar el progreso del entrenamiento\n", + " tensorboard_log=LOG_DIR\n", + ")\n", + "\n", + "# --- Bucle de Aprendizaje ---\n", + "# El método .learn() es el núcleo del entrenamiento de RL.\n", + "# Entrenaremos por (50,000 - 200,000) pasos (timesteps). Esto tomará unos minutos en Colab.\n", + "TIMESTEPS = 100_000\n", + "print(f\"\\n--- INICIANDO ENTRENAMIENTO DQN por {TIMESTEPS} pasos ---\")\n", + "\n", + "# Entrenar!!\n", + "model.learn(\n", + " total_timesteps=TIMESTEPS,\n", + " log_interval=100\n", + ")\n", + "\n", + "print(\"\\n--- ENTRENAMIENTO FINALIZADO. Modelo entrenado guardado. ---\")\n", + "model.save(\"modelo_nave_entrenada\") # Guarda el modelo entrenado\n", + "env_train.close()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "collapsed": true, + "id": "OtROtkf7gzka", + "outputId": "2a034bad-4142-407d-8073-925de62cac5e" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.\n", + "Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.\n", + "See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.\n", + ":488: DeprecationWarning: builtin type SwigPyPacked has no __module__ attribute\n", + ":488: DeprecationWarning: builtin type SwigPyObject has no __module__ attribute\n", + ":488: DeprecationWarning: builtin type swigvarlink has no __module__ attribute\n", + "/usr/local/lib/python3.12/dist-packages/pygame/pkgdata.py:25: DeprecationWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html\n", + " from pkg_resources import resource_stream, resource_exists\n", + "/usr/local/lib/python3.12/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.12/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('google.cloud')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.12/dist-packages/pkg_resources/__init__.py:3154: DeprecationWarning: Deprecated call to `pkg_resources.declare_namespace('sphinxcontrib')`.\n", + "Implementing implicit namespace packages (as specified in PEP 420) is preferred to `pkg_resources.declare_namespace`. See https://setuptools.pypa.io/en/latest/references/keywords.html#keyword-namespace-packages\n", + " declare_namespace(pkg)\n", + "/usr/local/lib/python3.12/dist-packages/jupyter_client/session.py:203: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).\n", + " return datetime.utcnow().replace(tzinfo=utc)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Using cpu device\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/jupyter_client/session.py:203: DeprecationWarning: datetime.datetime.utcnow() is deprecated and scheduled for removal in a future version. Use timezone-aware objects to represent datetimes in UTC: datetime.datetime.now(datetime.UTC).\n", + " return datetime.utcnow().replace(tzinfo=utc)\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "--- INICIANDO ENTRENAMIENTO DQN por 100000 pasos ---\n", + "Logging to ./tmp/dqn_lunar/DQN_4\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 213 |\n", + "| ep_rew_mean | -241 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 100 |\n", + "| fps | 990 |\n", + "| time_elapsed | 21 |\n", + "| total_timesteps | 21324 |\n", + "| train/ | |\n", + "| learning_rate | 0.0001 |\n", + "| loss | 1.19 |\n", + "| n_updates | 4080 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 318 |\n", + "| ep_rew_mean | -111 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 200 |\n", + "| fps | 858 |\n", + "| time_elapsed | 61 |\n", + "| total_timesteps | 53093 |\n", + "| train/ | |\n", + "| learning_rate | 0.0001 |\n", + "| loss | 0.729 |\n", + "| n_updates | 12023 |\n", + "----------------------------------\n", + "----------------------------------\n", + "| rollout/ | |\n", + "| ep_len_mean | 315 |\n", + "| ep_rew_mean | 8.29 |\n", + "| exploration_rate | 0.05 |\n", + "| time/ | |\n", + "| episodes | 300 |\n", + "| fps | 832 |\n", + "| time_elapsed | 101 |\n", + "| total_timesteps | 84629 |\n", + "| train/ | |\n", + "| learning_rate | 0.0001 |\n", + "| loss | 1.27 |\n", + "| n_updates | 19907 |\n", + "----------------------------------\n", + "\n", + "--- ENTRENAMIENTO FINALIZADO. Modelo entrenado guardado. ---\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Prueba de Vuelo" + ], + "metadata": { + "id": "nq6619RPJBFb" + } + }, + { + "cell_type": "code", + "source": [ + "# ==============================================================================\n", + "# 4. PRUEBA DE VUELO Y GRABAR EL VIDEO\n", + "# ==============================================================================\n", + "from IPython.display import HTML\n", + "from base64 import b64encode\n", + "import glob\n", + "import io\n", + "from pyvirtualdisplay import Display\n", + "\n", + "# Google collab tiene dependencias core deprecadas\n", + "import warnings\n", + "warnings.filterwarnings('ignore')\n", + "\n", + "# 1. Configurar la Pantalla Virtual (Necesario para Colab/Jupyter sin GUI)\n", + "print(\"\\n--- Configurando Pantalla Virtual ---\")\n", + "try:\n", + " display = Display(visible=0, size=(640, 480))\n", + " display.start()\n", + " print(\"Pantalla virtual iniciada.\")\n", + "except Exception as e:\n", + " print(f\"Advertencia al iniciar pyvirtualdisplay: {e}. Continuaremos.\")\n", + "\n", + "# 2. Crear un nuevo ambiente con el wrapper RecordVideo\n", + "# Creamos la carpeta de video si no existe\n", + "os.makedirs(VIDEO_FOLDER, exist_ok=True)\n", + "print(f\"Grabando {EPISODES} episodio(s) en la carpeta: {VIDEO_FOLDER}\")\n", + "\n", + "# Creamos el ambiente de test con el wrapper de video\n", + "env_test = gym.make(\n", + " ENV_NAME,\n", + " continuous=False,\n", + " gravity=-10,\n", + " enable_wind=False,\n", + " wind_power=15.0,\n", + " turbulence_power=0.1,\n", + " render_mode=\"rgb_array\"\n", + ")\n", + "# El wrapper de RecordVideo debe ser el que envuelve al ambiente base\n", + "env_test_video = RecordVideo(\n", + " env_test,\n", + " video_folder=VIDEO_FOLDER,\n", + " episode_trigger=lambda x: x == 0, # Graba solo el primer episodio\n", + " name_prefix=\"prueba_de_vuelo\"\n", + ")\n", + "\n", + "# 3. Cargar el modelo entrenado y ejecutar un episodio\n", + "# Cargamos el modelo que acabamos de entrenar y guardar\n", + "model = DQN.load(\"modelo_nave_entrenada\", env=env_test_video)\n", + "\n", + "obs, info = env_test_video.reset()\n", + "done = False\n", + "truncated = False\n", + "while not (done or truncated):\n", + " # El modelo determina la acción\n", + " action, _ = model.predict(obs, deterministic=True)\n", + " # Ejecutamos la acción\n", + " obs, reward, done, truncated, info = env_test_video.step(action)\n", + "\n", + "env_test_video.close()\n", + "print(\"\\n--- Grabación del video finalizada. ---\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jL7FmdKqhZaA", + "outputId": "faeee27a-cb52-472a-cde7-47828f328b1f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "--- Configurando Pantalla Virtual ---\n", + "Pantalla virtual iniciada.\n", + "Grabando 1 episodio(s) en la carpeta: ./video_prueba_de_vuelo\n", + "Wrapping the env with a `Monitor` wrapper\n", + "Wrapping the env in a DummyVecEnv.\n", + "\n", + "--- Grabación del video finalizada. ---\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Reproducir Video de la prueba" + ], + "metadata": { + "id": "ZYI_XlxDJJU4" + } + }, + { + "cell_type": "code", + "source": [ + "# ==============================================================================\n", + "# 5. CARGAR Y REPRODUCIR EL VIDEO DE LA PRUEBA DE VUELO\n", + "# ==============================================================================\n", + "import os\n", + "import glob\n", + "import io\n", + "from IPython.display import HTML, display\n", + "from base64 import b64encode\n", + "\n", + "# 1. Función para codificar y mostrar un video usando Base64\n", + "def display_encoded_video(video_path):\n", + " \"\"\"Codifica un video a Base64 y lo muestra en un Jupyter/Colab notebook.\"\"\"\n", + " print(f\"Mostrando: {video_path}\")\n", + "\n", + " try:\n", + " # Abrir y codificar el archivo\n", + " with io.open(video_path, 'rb') as f:\n", + " video_bytes = f.read()\n", + " video_encoded = b64encode(video_bytes).decode()\n", + "\n", + " # Crear y mostrar el tag de video HTML\n", + " html_tag = f\"\"\"\n", + " \n", + "

--------------------------------------------------

\n", + " \"\"\"\n", + " display(HTML(html_tag))\n", + "\n", + " except Exception as e:\n", + " print(f\"❌ ERROR al procesar o mostrar el video {video_path}: {e}\")\n", + " print(\"Esto podría ser por un archivo muy grande.\")\n", + "\n", + "\n", + "# 2. Buscar todos los archivos .mp4 en la carpeta\n", + "# Ordenamos por fecha de creación para verlos en orden de grabación\n", + "list_of_files = sorted(\n", + " glob.glob(os.path.join(VIDEO_FOLDER, \"*.mp4\")),\n", + " key=os.path.getctime\n", + ")\n", + "\n", + "# 3. Iterar y mostrar cada video\n", + "if list_of_files:\n", + " print(f\"✅ Se encontraron {len(list_of_files)} videos para reproducir.\")\n", + " for video_file in list_of_files:\n", + " display_encoded_video(video_file)\n", + "else:\n", + " print(f\"❌ No se encontró ningún archivo de video MP4 en {VIDEO_FOLDER}.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 485 + }, + "id": "VAqKRsjN2gb-", + "outputId": "7b7f43cc-af29-4f29-edcc-34345da691db" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "✅ Se encontraron 1 videos para reproducir.\n", + "Mostrando: ./video_prueba_de_vuelo/prueba_de_vuelo-episode-0.mp4\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + " \n", + "

--------------------------------------------------

\n", + " " + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Puntaje de la prueba" + ], + "metadata": { + "id": "za-H_n-3JRBd" + } + }, + { + "cell_type": "code", + "source": [ + "# ----------------------------------------------------------------------\n", + "# CALIFICACION DEL ENTRENAMIENTO\n", + "# ----------------------------------------------------------------------\n", + "\n", + "# Asume que estas variables ya han sido actualizadas por env_test_video.step()\n", + "# reward, done, truncated, info\n", + "\n", + "\n", + "# Imprimir cada variable en una línea separada\n", + "print(f\"Reward (Recompensa): {reward:.2f}\")\n", + "print(f\"Done (Logro Completar?): {done}\")\n", + "print(f\"Truncated (Tuvo que interrumpirse?): {truncated}\")\n", + "print(f\"Info (Información): {info}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 211 + }, + "id": "uabPapxG2_nO", + "outputId": "bc9ac42e-9bf3-4653-c4e9-bb6c063c6d2a" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "NameError", + "evalue": "name 'reward' is not defined", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipython-input-984683751.py\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[0;31m# Imprimir cada variable en una línea separada\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 10\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Reward (Recompensa): {reward:.2f}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 11\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Done (Logro Completar?): {done}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Truncated (Tuvo que interrumpirse?): {truncated}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'reward' is not defined" + ] + } + ] + } + ] +} \ No newline at end of file