# RWKV v5

Simple memory training for a small model

**Note:** This project assumes you have the rwkv-infctx conda env setup

# Basic Setup

In [1]:
# First lets setup the various directories, and init the model
!ls ../../../../../
!mkdir -p ../../../../../model/
!mkdir -p ../../../../../datapath/
!mkdir -p ../../../../../checkpoint/

CITATION.cff  RWKV-v4wavenet	 RWKV-v5headsize32  checkpoint	notebook
LICENSE       RWKV-v5		 RWKV-v5r2	    datapath	output
README.md     RWKV-v5altwavenet  RWKV-v5rstack	    docker
RWKV-v4neo    RWKV-v5headsize2x  RWKV-v5wavenet     model


In [2]:
# Additional dependencies for eval stuff
!pip install -q aiocsv aiofiles

[0m

In [3]:
DEEPSPEED_STRAT="deepspeed_stage_1"
GPU_DEVICES="auto"
ENABLE_WANDB=True

# Layer count and embed dim to start with
LAYER_COUNT=12
EMBED_DIM=2048

EMBED_SCALE=0.1
EMBED_SCALE_LABEL=str(EMBED_SCALE).replace(".", "_")

WANDB_PREFIX=f"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE}"
FILENAME_PREFIX=f"v5r3-L{LAYER_COUNT}-D{EMBED_DIM}-E{EMBED_SCALE_LABEL}"

print("DEEPSPEED_STRAT:", DEEPSPEED_STRAT)
print("ENABLE_WANDB:", ENABLE_WANDB)
print("GPU_DEVICES:", GPU_DEVICES)

if ENABLE_WANDB:
    WANDB_MODE="online"
else:
    WANDB_MODE="disabled"

# Computing the notebook, and various paths
import os
NOTEBOOK_DIR=os.path.dirname(os.path.abspath("__file__"))
CONFIG_DIR=os.path.abspath(os.path.join(NOTEBOOK_DIR, "../"))
PROJECT_DIR=os.path.abspath(os.path.join(CONFIG_DIR, "../../../../"))
TRAINER_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))
INFERENCE_DIR=os.path.abspath(os.path.join(PROJECT_DIR, "./RWKV-v5/"))

# Get the notebook dir name
DIR_NAME=os.path.basename(NOTEBOOK_DIR)

# Log names and dir
print("DIR_NAME:", DIR_NAME)
print("NOTEBOOK_DIR:", NOTEBOOK_DIR)
print("INFERENCE_DIR:", INFERENCE_DIR)
print("TRAINER_DIR:", TRAINER_DIR)
print("PROJECT_DIR:", PROJECT_DIR)

DEEPSPEED_STRAT: deepspeed_stage_1
ENABLE_WANDB: True
GPU_DEVICES: auto
DIR_NAME: L12-D2048-E1e-1-ctx4k
NOTEBOOK_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k
INFERENCE_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5
TRAINER_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/RWKV-v5
PROJECT_DIR: /actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer


In [4]:
# Init the model
!cd "{TRAINER_DIR}" && \
    export RWKV_WAVENET_LAYERS="{RWKV_WAVENET_LAYERS}" && \
    python3 ./init_model.py \
        --n_layer "{LAYER_COUNT}" --n_embd "{EMBED_DIM}" \
        --emb-scale "{EMBED_SCALE}" \
        --vocab_size neox --skip-if-exists \
        "../model/{FILENAME_PREFIX}-neox-init.pth"

/usr/bin/sh: 1: cd: can't cd to {TRAINER_DIR}


## Enwiki Stage 1 : Foundation 4k model training

In [5]:
# Lets preload the requried dataset 
!cd "{TRAINER_DIR}" && \
    python3 preload_datapath.py "{CONFIG_DIR}/config-enwiki-4k.yaml"

Saving the dataset (0/5 shards):   0%|         | 0/81505 [00:00<?, ? examples/s]

Saving the dataset (0/5 shards):   6%| | 5000/81505 [00:00<00:01, 40020.30 examp

Saving the dataset (0/5 shards):  13%|▏| 11000/81505 [00:00<00:01, 43937.85 exam

Saving the dataset (0/5 shards):  20%|▏| 16301/81505 [00:00<00:01, 46422.21 examSaving the dataset (1/5 shards):  20%|▏| 16301/81505 [00:00<00:01, 46422.21 exam

Saving the dataset (1/5 shards):  27%|▎| 22301/81505 [00:00<00:01, 48661.83 exam

Saving the dataset (1/5 shards):  35%|▎| 28301/81505 [00:00<00:01, 50444.99 exam

Saving the dataset (2/5 shards):  40%|▍| 32602/81505 [00:00<00:00, 50444.99 exam

Saving the dataset (2/5 shards):  44%|▍| 35602/81505 [00:00<00:00, 51589.83 exam

Saving the dataset (2/5 shards):  51%|▌| 41602/81505 [00:00<00:00, 45740.30 exam

Saving the dataset (2/5 shards):  58%|▌| 47602/81505 [00:01<00:00, 46874.25 examSaving the dataset (3/5 shards):  60%|▌| 48903/81505 [00:01<00:00, 46874.25 exam

Saving the dataset (3/5 shards):  67%|▋| 54903/81505 [00:01<00:00, 48825.20 exam

Saving the dataset (3/5 shards):  75%|▋| 60903/81505 [00:01<00:00, 50738.43 exam

Saving the dataset (4/5 shards):  80%|▊| 65204/81505 [00:01<00:00, 50738.43 exam

Saving the dataset (4/5 shards):  84%|▊| 68204/81505 [00:01<00:00, 51451.93 exam

Saving the dataset (4/5 shards):  92%|▉| 75204/81505 [00:01<00:00, 44264.66 exam

Saving the dataset (4/5 shards): 100%|▉| 81204/81505 [00:01<00:00, 45739.71 examSaving the dataset (5/5 shards): 100%|█| 81505/81505 [00:01<00:00, 45739.71 examSaving the dataset (5/5 shards): 100%|█| 81505/81505 [00:01<00:00, 47312.07 exam
Saving the dataset (0/1 shards):   0%|           | 0/410 [00:00<?, ? examples/s]Saving the dataset (1/1 shards): 100%|█| 410/410 [00:00<00:00, 40194.11 examplesSaving the dataset (1/1 shards): 100%|█| 410/410 [00:00<00:00, 38725.95 examples


In [6]:
# Start the foundation model training
!cd "{TRAINER_DIR}" && \
    export RWKV_WAVENET_LAYERS="{RWKV_WAVENET_LAYERS}" && \
    export WANDB_MODE="{WANDB_MODE}" && \
    python lightning_trainer.py fit \
        -c "{CONFIG_DIR}/config-enwiki-4k.yaml" \
        --trainer.logger.init_args.name="{WANDB_PREFIX} - Enwiki-4k Foundation (train-ctx=4k, {DEEPSPEED_STRAT})" \
        --trainer.strategy="{DEEPSPEED_STRAT}" \
        --trainer.devices="{GPU_DEVICES}" \
        --trainer.callbacks.init_args.dirpath="../checkpoint/{FILENAME_PREFIX}-enwiki-4k/" \
        --model.load_model="../model/{FILENAME_PREFIX}-neox-init.pth" \
        --model.ctx_len=4096 \
        --model.bptt_learning_range=1

/usr/bin/sh: 1: cd: can't cd to {TRAINER_DIR}


In [7]:
# Lets export the model from the checkpoint
!cd "{TRAINER_DIR}" && \
    python export_checkpoint.py "../checkpoint/{FILENAME_PREFIX}-enwiki-4k/last.ckpt" "../model/{FILENAME_PREFIX}-enwiki-4k.pth" "bf16"
!cd "{TRAINER_DIR}" && ls -alh "../model/{FILENAME_PREFIX}-enwiki-4k.pth"

/usr/bin/sh: 1: python: not found


ls: cannot access '../model/v5r3-L12-D2048-E0_1-enwiki-4k.pth': No such file or directory


In [8]:
# # Lets do a quick dragon prompt validation
!cd "{INFERENCE_DIR}" && \
    export RWKV_WAVENET_LAYERS="{RWKV_WAVENET_LAYERS}" && \
    python3 dragon_test.py "../model/{FILENAME_PREFIX}-enwiki-4k.pth" "cuda fp32"

/usr/bin/sh: 1: cd: can't cd to {INFERENCE_DIR}


In [9]:
# Lets do a quick memory test
!export RWKV_WAVENET_LAYERS="{RWKV_WAVENET_LAYERS}" && \
        python3 ../memory_script/eval_v5_memory_guided.py "{PROJECT_DIR}/model/{FILENAME_PREFIX}-enwiki-4k.pth"

python3: can't open file '/actions-runner/_work/RWKV-infctx-trainer/RWKV-infctx-trainer/notebook/experiment/rwkv-x-exp/v5-r3-memory/L12-D2048-E1e-1-ctx4k/../memory_script/eval_v5_memory_guided.py': [Errno 2] No such file or directory
