diff --git a/src/ddpm/generate_circle_dataset.py b/src/ddpm/generate_circle_dataset.py new file mode 100644 index 0000000..6423d51 --- /dev/null +++ b/src/ddpm/generate_circle_dataset.py @@ -0,0 +1,90 @@ +import numpy as np +from tqdm import tqdm +import os + +from concurrent.futures import ProcessPoolExecutor +from itertools import repeat + +RED = np.array((0xCC, 0x24, 0x1D)) +GREEN = np.array((0x98, 0x97, 0x1A)) +BLUE = np.array((0x45, 0x85, 0x88)) +BACKGROUND = np.array((0x50, 0x49, 0x45)) + + +def create_sample(id: int, image_size: int, distance: int, radius: int, delta: int): + # Create a blank image + img = np.full( + shape=(image_size, image_size, 3), fill_value=BACKGROUND, dtype=np.uint8 + ) + + # Compute random centers until they are inside the distance range + dist = float("inf") + while (dist < distance - delta) or (dist > distance + delta): + x0, y0 = np.random.randint( + low=radius, high=image_size - radius, size=2, dtype=np.int32 + ) + x1, y1 = np.random.randint( + low=radius, high=image_size - radius, size=2, dtype=np.int32 + ) + + dist = np.sqrt((x0 - x1) ** 2 + (y0 - y1) ** 2) + + # Draw the circles + xx, yy = np.mgrid[:image_size, :image_size] + circle0 = (xx - x0) ** 2 + (yy - y0) ** 2 + circle1 = (xx - x1) ** 2 + (yy - y1) ** 2 + + img = ( + img + + circle0[:, :, None] * GREEN[None, None, :] + + circle1[:, :, None] * BLUE[None, None, :] + ) + + return id, img + + +def generate_circle_dataset( + num_samples=1_000_000, + image_size=64, + radius=5, + distance=20, + delta=5, +): + """ + Generate a dataset of images with two circles (red and blue) and save as numpy tensors. + + Args: + num_samples (int): Number of images to generate. + image_size (int): Size of the square image (height and width). + radius (int): Radius of the circles. + distance (int): Base distance between the centers of the two circles. + delta (int): Maximum variation in the distance between the circles. + """ + + with ProcessPoolExecutor(max_workers=32) as executor: + for i, sample in executor.map( + create_sample, + range(num_samples), + repeat(image_size), + repeat(distance), + repeat(radius), + repeat(delta), + chunksize=100, + ): + yield i, sample + + +if __name__ == "__main__": + # Create output directory if it doesn't exist + total_samples = 1_000_000 + image_size = 64 + + output_dir = "data/circle_dataset" + os.makedirs(output_dir, exist_ok=True) + + dataset = np.empty((total_samples, image_size, image_size, 3), dtype=np.uint8) + iterator = generate_circle_dataset(num_samples=total_samples) + for i, sample in tqdm(iterator, total=total_samples): + dataset[i] = sample + + np.save(os.path.join(output_dir, "data_map.npy"), dataset)