First commit

9 months ago · 88bdf263d1
5 changed files with 2049 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,10 @@
 # Python-generated files
 __pycache__/
 *.py[oc]
 build/
 dist/
 wheels/
 *.egg-info
 # Virtual environments
 .venv
--- a/.python-version
+++ b/.python-version
@ -0,0 +1 @@
 3.11
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,18 @@
 [project]
 name = "diffusion-points"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.11"
 dependencies = [
    "distrax>=0.1.5",
    "einops>=0.8.1",
    "flax>=0.10.6",
    "jax[cuda12]>=0.6.0",
    "numpy>=2.2.5",
    "orbax>=0.1.9",
    "seaborn>=0.13.2",
    "tqdm>=4.67.1",
    "tyro>=0.9.19",
    "wandb>=0.19.10",
 ]
--- a/train.py
+++ b/train.py
@ -0,0 +1,258 @@
 import tyro
 from functools import partial
 from dataclasses import dataclass
 from tqdm import tqdm
 import seaborn as sns
 import matplotlib.pyplot as plt
 import jax
 import jax.numpy as jnp
 import flax.nnx as nnx
 import optax
@dataclass
 class Config:
    """Flow/DDM training of a simple distribution."""
    space_dimensions: int = 2
    """The dimensionality of the distribution's space."""
    num_hidden_layers: int = 4
    """Number of hidden layers in the MLP."""
    hidden_size: int = 64
    """The size of the hidden layers of the MLP."""
    mlp_bias: bool = True
    """Enable the bias on every layer of the MLP."""
    fourier_dim: int = 6
    """Fourier dimensions. Will be concatenated to the input of the MLP."""
    fourier_max_period: float = 10_000.0
    """Range of features of the Fourier features."""
    num_steps: int = 100_000
    """How many steps of gradient descent to perform."""
    batch_size: int = 512
    """How many samples per mini-batch."""
    r1: float = 0.3
    """Inner radius of the donut for p_data"""
    r2: float = 0.8
    """Outer radius of the donut for p_data"""
    sample_steps: int = 100
    """The number of steps taken during sampling"""
    seed: int = 42
    """The seed used for randomness."""
 # --- Data generation process ----------------------------
@partial(jax.jit, static_argnums=(1,))
 def sample_p_data(
    key: jax.random.PRNGKey, num_samples: int, r1: float, r2: float
 ) -> jax.Array:
    key_r, key_t = jax.random.split(key)
    u = jax.random.uniform(key_r, (num_samples,), minval=0.0, maxval=1.0)
    # radius distribution r => CDF(r) = (r^2 - r1^2)/(r^2 - r1^2) => invert:
    r = jnp.sqrt(u * (r2**2 - r1**2) + r1**2)
    # Sample angle uniformly in [0, 2pi]
    theta = jax.random.uniform(key_t, (num_samples,), minval=0.0, maxval=2 * jnp.pi)
    # Convert to cartesian
    x = r * jnp.cos(theta)
    y = r * jnp.sin(theta)
    return jnp.stack((x, y), axis=-1)
 # --- Model definition -----------------------------------
 class MLP(nnx.Module):
    def __init__(
        self,
        in_features: int,
        out_features: int,
        fourier_features: int,
        num_hidden_layers: int,
        hidden_size: int,
        use_bias: bool,
        fourier_max_period: float,
        rngs: nnx.Rngs,
    ) -> None:
        self.fourier_dim = fourier_features
        self.fourier_max_period = fourier_max_period
        network = [
            nnx.Linear(
                in_features=in_features + fourier_features,
                out_features=hidden_size,
                use_bias=use_bias,
                rngs=rngs,
            ),
            nnx.silu,
        ]
        for _ in range(num_hidden_layers):
            network.append(
                nnx.Linear(
                    in_features=hidden_size,
                    out_features=hidden_size,
                    use_bias=use_bias,
                    rngs=rngs,
                )
            )
            network.append(nnx.silu)
        network.append(
            nnx.Linear(
                in_features=hidden_size,
                out_features=out_features,
                use_bias=use_bias,
                rngs=rngs,
            )
        )
        self.network = nnx.Sequential(*network)
    def time_embed(
        self, t: jax.Array, embed_dim: int, max_period: float = 10_000.0
    ) -> jax.Array:
        assert embed_dim % 2 == 0, "embed_dim must be even"
        if t.shape[-1] != 1:
            t = t[..., None]
        half_dim = embed_dim // 2
        freqs = jnp.exp(
            -jnp.log(max_period) * jnp.arange(half_dim, dtype=jnp.float32) / half_dim
        )
        args = t * freqs
        time_features = jnp.concatenate([jnp.sin(args), jnp.cos(args)], axis=-1)
        return time_features
    def __call__(self, x: jax.Array, t: jax.Array) -> jax.Array:
        t_encoded = self.time_embed(t, self.fourier_dim, self.fourier_max_period)
        x = jnp.concatenate((x, t_encoded), axis=-1)
        return self.network(x)
 # --- Diffusion functions -----------------------------
 def alpha(t: jax.Array) -> jax.Array:
    return jnp.clip(t, 0.0, 1.0)
 alpha_scalar_grad = jax.grad(alpha)
 alpha_grad = jax.jit(jax.vmap(alpha_scalar_grad))
 def beta(t: jax.Array) -> jax.Array:
    return 1.0 - jnp.clip(t, 0.0, 1.0)
 beta_scalar_grad = jax.grad(beta)
 beta_grad = jax.jit(jax.vmap(beta_scalar_grad))
 def ode_step(model: MLP, x_t: jax.Array, t: jax.Array, h: float) -> jax.Array:
    return x_t + h * model(x_t, t)
 def ode_trajectory(
    key: jax.random.PRNGKey, model: MLP, num_samples: int, config: Config
 ) -> jax.Array:
    t = jnp.zeros((num_samples,))
    h = 1.0 / config.sample_steps
    x = jax.random.normal(key=key, shape=(num_samples, config.space_dimensions))
    for i in range(config.sample_steps):
        x = ode_step(model, x, t, h)
        t = t + h
    return x
 def sde_step(model: MLP, x_t: jax.Array, t: jax.Array, sigma_t: jax.Array) -> jax.Array:
    pass
 def sde_trajectory(model: MLP) -> jax.Array:
    pass
 # --- Training ----------------------------------------
 def main(config: Config):
    rngs = nnx.Rngs(config.seed)
    model = MLP(
        in_features=config.space_dimensions,
        out_features=config.space_dimensions,
        num_hidden_layers=config.num_hidden_layers,
        hidden_size=config.hidden_size,
        use_bias=config.mlp_bias,
        fourier_features=config.fourier_dim,
        fourier_max_period=config.fourier_max_period,
        rngs=rngs,
    )
    optim = nnx.Optimizer(
        model,
        tx=optax.chain(optax.clip_by_global_norm(1.0), optax.adamw(learning_rate=3e-4)),
    )
    @nnx.jit
    def train_step(
        model: MLP, optim: nnx.Optimizer, z: jax.Array, key: jax.random.PRNGKey
    ):
        key_e, key_t = jax.random.split(key)
        eps = jax.random.normal(key=key_e, shape=z.shape)
        t = jax.random.uniform(key=key_t, shape=[z.shape[0]])
        x = alpha(t)[:, None] * z + beta(t)[:, None] * eps
        def loss_fn(model, z, t, eps):
            loss = jnp.sum(
                (
                    model(x, t)
                    - (alpha_grad(t)[:, None] * z + beta_grad(t)[:, None] * eps)
                )
                ** 2,
                axis=-1,
            )
            return jnp.mean(loss)
        value_grad_fn = nnx.value_and_grad(loss_fn)
        loss, grads = value_grad_fn(model, z, t, eps)
        optim.update(grads)
        return loss
    cached_train_step = nnx.cached_partial(train_step, model, optim)
    for i in tqdm(range(config.num_steps)):
        z = sample_p_data(
            rngs.params(), num_samples=config.batch_size, r1=config.r1, r2=config.r2
        )
        _ = cached_train_step(z, rngs.params())
    # Generate samples
    print("sampling...", end="", flush=True)
    samples = ode_trajectory(
        key=rngs.params(), model=model, num_samples=1024, config=config
    )
    print(" done!")
    # samples = np.array(z)
    sns.scatterplot(x=samples[:, 0], y=samples[:, 1])
    plt.savefig("scatter.png", dpi=300, bbox_inches="tight")
 if __name__ == "__main__":
    config = tyro.cli(Config)
    main(config)
--- a/uv.lock
+++ b/uv.lock