Data Logging

Labbench provides several data logging capabilities oriented toward experiments that involve complex sweeps or test conditions. Their general idea is to automatically log small details (device parameters, test conditions, git commit hashes, etc.) so that automation code is focused on the test procedure. The resulting logging system makes many implicit decisions but attempts describe the resulting structure clearly:

Example: Logging Device objects

To get started, consider a simple loop:

import numpy as np
import labbench as lb
from labbench.testing.pyvisa_sim import SpectrumAnalyzer, PowerSensor

lb.visa_default_resource_manager('@sim')
lb.show_messages('info')

sensor = PowerSensor()
analyzer = SpectrumAnalyzer()

db = lb.CSVLogger(path=f"./{np.random.bytes(8).hex()}")
db.observe_paramattr([sensor, analyzer])

with sensor, analyzer, db:
    for freq in (5.8e9, 5.85e9, 5.9e9):
        analyzer.center_frequency = freq
        sensor.frequency = freq

        sensor.trigger()
        analyzer.trigger()

        data = {
            'analyzer_trace': analyzer.fetch(),
            'sensor_reading': sensor.fetch()[0],
        }

        db.new_row(data)
 INFO   2024-01-22 14:36:44,260.260aggregator: PowerSensor() named 'sensor' by introspection
 INFO   2024-01-22 14:36:44,260.260aggregator: SpectrumAnalyzer() named 'analyzer' by introspection
 INFO   2024-01-22 14:36:46,172.172aggregator: CSVLogger('d908ee6979c1792a') named 'db' by introspection

Output data structure

Experimental results are populated as follows in a directory at the given path:

image

The root table in outputs.csv gives the high level test conditions and results:

import pandas as pd

root = pd.read_csv(f'{db.path}/outputs.csv')
root
analyzer_trace sensor_reading analyzer_center_frequency sensor_frequency sensor_trigger_count db_host_time db_host_log
0 0/analyzer_trace.csv -52.617 5.800000e+09 5.800000e+09 200 2024-01-22 14:36:46.285057 0/db_host_log.json
1 1/analyzer_trace.csv -52.617 5.850000e+09 5.850000e+09 200 2024-01-22 14:36:46.396226 1/db_host_log.json
2 2/analyzer_trace.csv -52.617 5.900000e+09 5.900000e+09 200 2024-01-22 14:36:46.512491 2/db_host_log.json

This points us at scalar test conditions and results, and paths to paths to files containing for non-scalar data (arrays, tables, etc.) and long text strings. Examples here include the measurement trace from the spectrum analyzer (column 'analyzer_trace.csv'), and the host log JSON file ('host_log'). For example:

pd.read_csv(f"{db.path}/{root['analyzer_trace'][0]}")
frequency power_spectral_density
0 5.795000e+09 -52.617
1 5.795050e+09 -52.373
2 5.795101e+09 -52.724
3 5.795151e+09 -51.893
4 5.795201e+09 -52.270
... ... ...
195 5.804799e+09 -51.752
196 5.804849e+09 -53.065
197 5.804899e+09 -52.585
198 5.804950e+09 -51.861
199 5.805000e+09 -51.596

200 rows × 2 columns

import json

with open(f"{db.path}/metadata.json", 'r') as stream:
    metadata = json.load(stream)

# metadata['device_objects']
metadata['field_name_sources']
{'sensor_isopen': {'object': 'sensor.isopen',
  'paramattr': '<labbench.paramattr.property.bool() as isopen>',
  'type': 'bool',
  'help': '\n\n`True` if the backend is ready for use',
  'label': ''},
 'sensor_resource': {'object': 'sensor.resource',
  'paramattr': '<labbench.paramattr.value.str(None) as resource>',
  'type': 'str',
  'help': 'device address or URI',
  'label': ''},
 'analyzer_isopen': {'object': 'analyzer.isopen',
  'paramattr': '<labbench.paramattr.property.bool() as isopen>',
  'type': 'bool',
  'help': '\n\n`True` if the backend is ready for use',
  'label': ''},
 'analyzer_resource': {'object': 'analyzer.resource',
  'paramattr': '<labbench.paramattr.value.str(None) as resource>',
  'type': 'str',
  'help': 'device address or URI',
  'label': ''},
 'db_host_log': {'object': 'db.host.log',
  'paramattr': '<labbench.paramattr.property.list() as log>',
  'type': 'list',
  'help': '\n\nGet the current host log contents.',
  'label': ''},
 'db_munge_force_relational': {'object': 'db.munge.force_relational',
  'paramattr': '<labbench.paramattr.value.list(allow_none=False, default=[]) as force_relational>',
  'type': 'list',
  'help': 'list of column names to always save as relational data',
  'label': ''},
 'analyzer_center_frequency': {'object': 'analyzer.center_frequency',
  'paramattr': '<labbench.paramattr.property.float(None) as center_frequency>',
  'type': 'float',
  'help': 'input signal center frequency',
  'label': 'Hz'},
 'sensor_frequency': {'object': 'sensor.frequency',
  'paramattr': '<labbench.paramattr.property.float(None) as frequency>',
  'type': 'float',
  'help': 'calibration frequency',
  'label': 'Hz'},
 'sensor_trigger_count': {'object': 'sensor.trigger_count',
  'paramattr': '<labbench.paramattr.property.int(None) as trigger_count>',
  'type': 'int',
  'help': 'acquisition count',
  'label': 'samples'},
 'db_host_time': {'object': 'db.host.time',
  'paramattr': '<labbench.paramattr.property.str(None) as time>',
  'type': 'str',
  'help': '\n\nGet a timestamp of the current time',
  'label': ''},
 'db_host_isopen': {'object': 'db.host.isopen',
  'paramattr': '<labbench.paramattr.property.bool() as isopen>',
  'type': 'bool',
  'help': '\n\n`True` if the backend is ready for use',
  'label': ''},
 'db_munge_resource': {'object': 'db.munge.resource',
  'paramattr': '<labbench.paramattr.value.Path(None) as resource>',
  'type': 'pathlib.Path',
  'help': 'base directory for all data',
  'label': ''},
 'db_munge_isopen': {'object': 'db.munge.isopen',
  'paramattr': '<labbench.paramattr.property.bool() as isopen>',
  'type': 'bool',
  'help': '\n\n`True` if the backend is ready for use',
  'label': ''},
 'db_munge_text_relational_min': {'object': 'db.munge.text_relational_min',
  'paramattr': '<labbench.paramattr.value.int(None) as text_relational_min>',
  'type': 'int',
  'help': 'minimum size threshold that triggers storing text in a relational file',
  'label': ''},
 'db_munge_relational_name_fmt': {'object': 'db.munge.relational_name_fmt',
  'paramattr': '<labbench.paramattr.value.str(None) as relational_name_fmt>',
  'type': 'str',
  'help': 'directory name format for data in each row keyed on column',
  'label': ''},
 'db_munge_nonscalar_file_type': {'object': 'db.munge.nonscalar_file_type',
  'paramattr': '<labbench.paramattr.value.str(None) as nonscalar_file_type>',
  'type': 'str',
  'help': 'file format for non-scalar numerical data',
  'label': ''},
 'sensor_read_termination': {'object': 'sensor.read_termination',
  'paramattr': '<labbench.paramattr.value.str(None) as read_termination>',
  'type': 'str',
  'help': 'end of line string to expect in query replies',
  'label': ''},
 'sensor_write_termination': {'object': 'sensor.write_termination',
  'paramattr': '<labbench.paramattr.value.str(None) as write_termination>',
  'type': 'str',
  'help': 'end-of-line string to send after writes',
  'label': ''},
 'sensor_open_timeout': {'object': 'sensor.open_timeout',
  'paramattr': '<labbench.paramattr.value.float(None) as open_timeout>',
  'type': 'float',
  'help': 'timeout for opening a connection to the instrument',
  'label': 's'},
 'sensor_timeout': {'object': 'sensor.timeout',
  'paramattr': '<labbench.paramattr.value.float(None) as timeout>',
  'type': 'float',
  'help': 'message response timeout',
  'label': 's'},
 'sensor_make': {'object': 'sensor.make',
  'paramattr': '<labbench.paramattr.value.str(None) as make>',
  'type': 'str',
  'help': 'device manufacturer name used to autodetect resource string',
  'label': ''},
 'sensor_model': {'object': 'sensor.model',
  'paramattr': '<labbench.paramattr.value.str(None) as model>',
  'type': 'str',
  'help': 'device model used to autodetect resource string',
  'label': ''},
 'sensor_serial': {'object': 'sensor.serial',
  'paramattr': '<labbench.paramattr.property.str(None) as serial>',
  'type': 'str',
  'help': '\n\ndevice-reported serial number',
  'label': ''},
 'sensor__revision': {'object': 'sensor._revision',
  'paramattr': '<labbench.paramattr.property.str(None) as _revision>',
  'type': 'str',
  'help': 'device revision information\n\ndevice-reported revision',
  'label': ''},
 'sensor__identity': {'object': 'sensor._identity',
  'paramattr': '<labbench.paramattr.property.str(None) as _identity>',
  'type': 'str',
  'help': 'identity string reported by the instrument',
  'label': ''},
 'analyzer_read_termination': {'object': 'analyzer.read_termination',
  'paramattr': '<labbench.paramattr.value.str(None) as read_termination>',
  'type': 'str',
  'help': 'end of line string to expect in query replies',
  'label': ''},
 'analyzer_write_termination': {'object': 'analyzer.write_termination',
  'paramattr': '<labbench.paramattr.value.str(None) as write_termination>',
  'type': 'str',
  'help': 'end-of-line string to send after writes',
  'label': ''},
 'analyzer_open_timeout': {'object': 'analyzer.open_timeout',
  'paramattr': '<labbench.paramattr.value.float(None) as open_timeout>',
  'type': 'float',
  'help': 'timeout for opening a connection to the instrument',
  'label': 's'},
 'analyzer_timeout': {'object': 'analyzer.timeout',
  'paramattr': '<labbench.paramattr.value.float(None) as timeout>',
  'type': 'float',
  'help': 'message response timeout',
  'label': 's'},
 'analyzer_make': {'object': 'analyzer.make',
  'paramattr': '<labbench.paramattr.value.str(None) as make>',
  'type': 'str',
  'help': 'device manufacturer name used to autodetect resource string',
  'label': ''},
 'analyzer_model': {'object': 'analyzer.model',
  'paramattr': '<labbench.paramattr.value.str(None) as model>',
  'type': 'str',
  'help': 'device model used to autodetect resource string',
  'label': ''},
 'analyzer_serial': {'object': 'analyzer.serial',
  'paramattr': '<labbench.paramattr.property.str(None) as serial>',
  'type': 'str',
  'help': '\n\ndevice-reported serial number',
  'label': ''},
 'analyzer__revision': {'object': 'analyzer._revision',
  'paramattr': '<labbench.paramattr.property.str(None) as _revision>',
  'type': 'str',
  'help': 'device revision information\n\ndevice-reported revision',
  'label': ''},
 'analyzer__identity': {'object': 'analyzer._identity',
  'paramattr': '<labbench.paramattr.property.str(None) as _identity>',
  'type': 'str',
  'help': 'identity string reported by the instrument',
  'label': ''},
 'db_host_resource': {'object': 'db.host.resource',
  'paramattr': '<labbench.paramattr.value.str(None) as resource>',
  'type': 'str',
  'help': 'device address or URI',
  'label': ''},
 'db_host_git_commit_in': {'object': 'db.host.git_commit_in',
  'paramattr': '<labbench.paramattr.value.str(None) as git_commit_in>',
  'type': 'str',
  'help': 'git commit on open() if run inside a git repo with this branch name',
  'label': ''},
 'db_host_git_commit_id': {'object': 'db.host.git_commit_id',
  'paramattr': '<labbench.paramattr.property.str(None) as git_commit_id>',
  'type': 'str',
  'help': '\n\nTry to determine the current commit hash of the current git repo',
  'label': ''},
 'db_host_git_remote_url': {'object': 'db.host.git_remote_url',
  'paramattr': '<labbench.paramattr.property.str(None) as git_remote_url>',
  'type': 'str',
  'help': '\n\nTry to identify the remote URL of the repository of the current git repo',
  'label': ''},
 'db_host_hostname': {'object': 'db.host.hostname',
  'paramattr': '<labbench.paramattr.property.str(None) as hostname>',
  'type': 'str',
  'help': '\n\nGet the name of the current host',
  'label': ''},
 'db_host_git_browse_url': {'object': 'db.host.git_browse_url',
  'paramattr': '<labbench.paramattr.property.str(None) as git_browse_url>',
  'type': 'str',
  'help': '\n\nURL for browsing the current git repository',
  'label': ''},
 'db_host_git_pending_changes': {'object': 'db.host.git_pending_changes',
  'paramattr': '<labbench.paramattr.property.str(None) as git_pending_changes>',
  'type': 'str',
  'help': '',
  'label': ''}}

For a more systematic analysis to analyzing the data, we may want to expand the root table based on the relational data files in one of these columns. A shortcut for this is provided by labbench.read_relational():

lb.read_relational(
    f'{db.path}/outputs.csv',

    # the column containing paths to relational data tables.
    # the returned table places a .
    'analyzer_trace',

    # copy fixed values of these column across as columns in each relational data table
    ['sensor_frequency', 'sensor_reading']
)
analyzer_trace analyzer_trace_frequency analyzer_trace_id analyzer_trace_power_spectral_density root_index sensor_frequency sensor_reading
0 0/analyzer_trace.csv 5.795000e+09 0 -52.617 0 5.800000e+09 -52.617
1 0/analyzer_trace.csv 5.795050e+09 1 -52.373 0 5.800000e+09 -52.617
2 0/analyzer_trace.csv 5.795101e+09 2 -52.724 0 5.800000e+09 -52.617
3 0/analyzer_trace.csv 5.795151e+09 3 -51.893 0 5.800000e+09 -52.617
4 0/analyzer_trace.csv 5.795201e+09 4 -52.270 0 5.800000e+09 -52.617
... ... ... ... ... ... ... ...
595 2/analyzer_trace.csv 5.904799e+09 195 -51.752 2 5.900000e+09 -52.617
596 2/analyzer_trace.csv 5.904849e+09 196 -53.065 2 5.900000e+09 -52.617
597 2/analyzer_trace.csv 5.904899e+09 197 -52.585 2 5.900000e+09 -52.617
598 2/analyzer_trace.csv 5.904950e+09 198 -51.861 2 5.900000e+09 -52.617
599 2/analyzer_trace.csv 5.905000e+09 199 -51.596 2 5.900000e+09 -52.617

600 rows × 7 columns

For each row in the root table, the expanded table is expanded with a copy of the contents of the relational data table in its file path ending in 'analyzer_trace.csv'.

import labbench as lb
from labbench.testing.pyvisa_sim import SpectrumAnalyzer, PowerSensor, SignalGenerator
import numpy as np
from shutil import rmtree

FREQ_COUNT = 3
DUT_NAME = "DUT 63"
DATA_PATH = './data'


# the labbench.testing devices support simulated pyvisa operations
lb.visa_default_resource_manager('@sim')

class Testbed(lb.Rack):
    sensor: PowerSensor = PowerSensor()
    analyzer: SpectrumAnalyzer = SpectrumAnalyzer()
    db: lb.CSVLogger = lb.CSVLogger(path=DATA_PATH)

    def open(self):
        # remove prior data before we start
        self.db.observe_paramattr(self.analyzer)
        self.db.observe_paramattr(self.sensor, always=['sweep_aperture'])

    def single(self, frequency: float):
        self.analyzer.center_frequency = frequency
        self.sensor.frequency = frequency

        self.sensor.trigger()
        self.analyzer.trigger()

        return dict(
            analyzer_trace=self.analyzer.fetch(),
            sensor_reading=self.sensor.fetch()[0]
        )

rmtree(Testbed.db.path, True)

with Testbed() as rack:
    for freq in np.linspace(5.8e9, 5.9e9, FREQ_COUNT):
        rack.single(freq)

        # this could also go in single()
        rack.db.new_row(
            comments='try 1.21 GW for time-travel',
            dut = DUT_NAME,
        )