Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions labcore/data/datagen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@

import numpy as np

from dataclasses import dataclass, asdict
from abc import ABC, abstractmethod

"""

Implementation would work like this given x (n-dimensional array):

sine() = SineDataGen()
model = sine.generate(x, noise_std = 0.5, A = 2, f = 3)

-------------
These two lines would create a synthesizer representing a sine wave and generate models
for each set of data stored in x.

Std for Gaussian distribution passed to noise() is 0.5 --> maybe add way to have multiple
distributions for different data sets?

A, f are passed to SineDataGen's model() as kwargs

DataGen's generate() then applies the noise to the model and returns an array with
the same dimension that was passed

"""

"""
generate should now work such that the following can be done:

x = [np array of coordinates]
sine = SineDataGen(A = 2, f = 3)

coords = sine.generate() --> uses A = 2, f = 3
coords = sine.generate(A = 5) --> uses A = 5, f = 2

"""

@dataclass
class DataGen(ABC):
noise_std : float = 1.0

@abstractmethod
def model(coordinates, *args, **kwargs):
pass

def generate(self, coordinates, **kwargs):

# updates previously set dataclass fields
params = asdict(self)
params.update(kwargs)
noise_std = params.pop('noise_std')

one_d = coordinates.ndim == 1
coordinates = np.atleast_2d(coordinates)
model_outputs = np.array([self.model(coords, **params) +
self.noise(coords, noise_std)
for coords in coordinates])
if (one_d):
return model_outputs.squeeze()

return model_outputs

@staticmethod
def noise(coordinates, std):
return np.random.normal(scale = std, size = len(coordinates))


@dataclass
class ExponentialDataGen(DataGen):

base: float = np.e

@staticmethod
def model(coordinates, base):
return base ** coordinates



@dataclass
class SineDataGen(DataGen):

A : float = 1
f : float = 1
phi : float = 0
of : float = 0

@staticmethod
def model(coordinates, A, f, phi, of):
return A * np.sin(2 * np.pi * coordinates * f + phi) + of


@dataclass
class GaussianDataGen(DataGen):

x0 : float = 0
sigma : float = 1
A : float = 1
of : float = 0

@staticmethod
def model(coordinates, x0, sigma, A, of):
return A * np.exp(-((coordinates - x0) ** 2) / (2 * sigma ** 2)) + of