Custom feeders

While the basic feeders DataFeeder and SpectralDataFeeder were designed to work with data that were already pre-processed (resampled, filtered, segmented using the Prepare training inputs interface) and stored in compressed numpy format, certain applications may require data to be loaded and fed into the training pipeline via other mechanisms/files/formats (for example, feeding directly from audio files or from a database object). Koogu facilitates these by allowing users to define custom feeders that implement their desired logic.

Custom feeders can be defined by extending the abstract class koogu.data.feeder.BaseFeeder.

Note

This requires writing code to use the TensorFlow API directly.

The below example shows an implementation which extends the BaseFeeder class to feed clips by loading directly from audio files.

import os
import tensorflow as tf
from koogu.data.feeder import BaseFeeder

# Assuming that you have saved 1 second long audio clips each containing
# sounds from one of three species of frogs, and that the audio clips
# are available as .wav files organized under species-specific directories.

fs = 24000        # sampling frequency of the audio files
directories_as_class_names = [
    'Lithobates sylvaticus',
    'Lithobates catesbeianus',
    'Dryophytes versicolor'
]


def read_files(filelist, sp_idx):
    # Utility function (a generator) to read a list of audio files one by one

    # One-hot encoded label for the current species
    label = tf.one_hot(sp_idx, 3)

    for fname in filelist:
        # Read in the audio samples from
        #   directories_as_class_names[sp_idx] + '/' + fname.decode()
        # using one of SoundFile, AudioRead, scipy.io.wavfile, etc.
        clip = ...

        # return the clip and the label
        yield clip, label


class MyFeeder(BaseFeeder):

    def __init__(self):
        """
        Register number of samples available, and decide how to split
        train vs test subsets.
        """

        # Get the list of files available in each directory/class
        self.sp0_files = os.listdir(directories_as_class_names[0])
        self.sp1_files = os.listdir(directories_as_class_names[1])
        self.sp2_files = os.listdir(directories_as_class_names[2])

        # Shuffle the lists' contents as desired
        #  ...

        # File/sample counts per species
        file_counts = [
            len(self.sp0_files),
            len(self.sp1_files),
            len(self.sp2_files)
        ]

        # Earmark 15% for validation; remaining will be used as training samples
        per_class_training_samples = [0, 0, 0]
        per_class_eval_samples = [0, 0, 0]
        for class_idx, fc in enumerate(file_counts):
            per_class_training_samples[class_idx] = int(round(fc * 0.85))
            per_class_eval_samples[class_idx] = \
                fc - per_class_training_samples[class_idx]

        # Invoke the parent constructor
        super(MyFeeder, self).__init__(
            (fs, ),
            per_class_training_samples, per_class_eval_samples,
            directories_as_class_names)

    def make_dataset(self, is_training, batch_size, **kwargs):
        """
        Build a TensorFlow Dataset comprising all training or eval clips
        """

        # Make class-specific datasets
        sp_ds = [None, None, None]
        for sp_idx, sp_files in enumerate(
                [self.sp0_files, self.sp1_files, self.sp2_files]):

            # Restrict which files to read based on train/eval mode
            split_idx = self.training_samples_per_class[sp_idx]
            if is_training:
                filelist = sp_files[:split_idx]
            else:
                filelist = sp_files[split_idx:]

            sp_ds[sp_idx] = tf.data.Dataset.from_generator(
                lambda a, b: read_files(a, b),
                args=(filelist, sp_idx),
                output_signature=(
                    tf.TensorSpec(shape=(fs, ), dtype=tf.float32),  # clip
                    tf.TensorSpec(shape=(3, ), dtype=tf.float32)    # label
                )
            )

        # Concatenate all class-specific data
        dataset = sp_ds[0].concatenate(sp_ds[1]).concatenate(sp_ds[2])

        # Invoke the base class functionality to shuffle & batch, or implement
        # the logic yourself as needed.
        return self._queue_and_batch(dataset, is_training, batch_size, **kwargs)

    def transform(self, sample, label, is_training, **kwargs):
        # Pass as-is, not doing any transformation in this example
        return sample, label

    def pre_transform(self, sample, label, is_training, **kwargs):
        # Pass as-is, not applying any augmentations in this example
        return sample, label

    def post_transform(self, sample, label, is_training, **kwargs):
        # Pass as-is, not applying any augmentations in this example
        return sample, label

Converting waveforms to spectrograms

In the above example, the loaded audio clips will be presented as-is (as waveforms) to the model during training/validation. You can convert the clips into power spectral density spectrograms before they are presented to the model, by implementing the functionality in the transform() method and overriding the get_shape_transformation_info() method as shown below.

    # --- update the constructor from the above example ---
    def __init__(self):
      ...
      ...
      # Invoke the parent constructor
      super(MyFeeder, ...
      ...
      ...

      # Define the settings for transformation
      spec_settings = {
          'win_len': ...
          # ...
          # see koogu.data.tf_transformations.Audio2Spectral for list of keys
      }

      # Instantiate the transformation object
      self._transform = koogu.data.tf_transformations.Audio2Spectral(fs, spec_settings)

      self._in_shape = (fs, )

      # Update parent's member variable to reflect the transformed output shape
      self._shape = self._transform.compute_output_shape([1] + self._in_shape)[1:]

    def transform(self, clip, label, is_training, **kwargs):

      # Apply the transformation
      output = self._transform(clip)

      return output, label

    def get_shape_transformation_info(self):
      return self._in_shape, self._transform