Browse Source

generate circle dataset

master
CALVO GONZALEZ Ramon 9 months ago
parent
commit
fd2da9193b
  1. 90
      src/ddpm/generate_circle_dataset.py

90
src/ddpm/generate_circle_dataset.py

@ -0,0 +1,90 @@
import numpy as np
from tqdm import tqdm
import os
from concurrent.futures import ProcessPoolExecutor
from itertools import repeat
RED = np.array((0xCC, 0x24, 0x1D))
GREEN = np.array((0x98, 0x97, 0x1A))
BLUE = np.array((0x45, 0x85, 0x88))
BACKGROUND = np.array((0x50, 0x49, 0x45))
def create_sample(id: int, image_size: int, distance: int, radius: int, delta: int):
# Create a blank image
img = np.full(
shape=(image_size, image_size, 3), fill_value=BACKGROUND, dtype=np.uint8
)
# Compute random centers until they are inside the distance range
dist = float("inf")
while (dist < distance - delta) or (dist > distance + delta):
x0, y0 = np.random.randint(
low=radius, high=image_size - radius, size=2, dtype=np.int32
)
x1, y1 = np.random.randint(
low=radius, high=image_size - radius, size=2, dtype=np.int32
)
dist = np.sqrt((x0 - x1) ** 2 + (y0 - y1) ** 2)
# Draw the circles
xx, yy = np.mgrid[:image_size, :image_size]
circle0 = (xx - x0) ** 2 + (yy - y0) ** 2
circle1 = (xx - x1) ** 2 + (yy - y1) ** 2
img = (
img
+ circle0[:, :, None] * GREEN[None, None, :]
+ circle1[:, :, None] * BLUE[None, None, :]
)
return id, img
def generate_circle_dataset(
num_samples=1_000_000,
image_size=64,
radius=5,
distance=20,
delta=5,
):
"""
Generate a dataset of images with two circles (red and blue) and save as numpy tensors.
Args:
num_samples (int): Number of images to generate.
image_size (int): Size of the square image (height and width).
radius (int): Radius of the circles.
distance (int): Base distance between the centers of the two circles.
delta (int): Maximum variation in the distance between the circles.
"""
with ProcessPoolExecutor(max_workers=32) as executor:
for i, sample in executor.map(
create_sample,
range(num_samples),
repeat(image_size),
repeat(distance),
repeat(radius),
repeat(delta),
chunksize=100,
):
yield i, sample
if __name__ == "__main__":
# Create output directory if it doesn't exist
total_samples = 1_000_000
image_size = 64
output_dir = "data/circle_dataset"
os.makedirs(output_dir, exist_ok=True)
dataset = np.empty((total_samples, image_size, image_size, 3), dtype=np.uint8)
iterator = generate_circle_dataset(num_samples=total_samples)
for i, sample in tqdm(iterator, total=total_samples):
dataset[i] = sample
np.save(os.path.join(output_dir, "data_map.npy"), dataset)
Loading…
Cancel
Save