Time duplication analysis for the data from Wintab diagnostics tool¶
File reading¶
In [424]:
import os
from datetime import datetime
import pandas as pd
files_prefix = f'../../data'
filename = '2024-04-09_wtcapt-pc-bamboo.txt'
filepath = f'{files_prefix}/{filename}'
separator = ','
rows_to_skip = 50
df = pd.read_csv(filepath, sep=separator, encoding='utf-16', skiprows=rows_to_skip, on_bad_lines='warn')
df = df.rename(columns=lambda h: h.strip())
# drop single-string rows, i.e. prox
df = df.dropna(axis=0, how='any')
creation_time = datetime.utcfromtimestamp(os.path.getctime(filepath))
start_time = pd.to_datetime(creation_time)
Initial data displaying¶
In [425]:
print(f'The start time is {start_time}')
display(df)
The start time is 2024-04-12 14:12:42.280035
hCtx | status | time | changed | serial | csr | bttn | X | Y | npres | azi | alt | twist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 00000804 | 0.0 | 300360.0 | 000011b4 | 1.0 | 1.0 | 0.0 | 8686.0 | 4989.0 | 0.0 | 0.0 | 900.0 | 0 [300360] |
2 | 00000804 | 0.0 | 300367.0 | 00000194 | 2.0 | 1.0 | 0.0 | 8691.0 | 4998.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
3 | 00000804 | 0.0 | 300375.0 | 00000194 | 3.0 | 1.0 | 0.0 | 8694.0 | 5006.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
4 | 00000804 | 0.0 | 300382.0 | 00000194 | 4.0 | 1.0 | 0.0 | 8698.0 | 5014.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
5 | 00000804 | 0.0 | 300390.0 | 00000194 | 5.0 | 1.0 | 0.0 | 8704.0 | 5019.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
447 | 00000804 | 0.0 | 303699.0 | 00000454 | 447.0 | 1.0 | 0.0 | 11234.0 | 1822.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
448 | 00000804 | 0.0 | 303707.0 | 00000194 | 448.0 | 1.0 | 0.0 | 11844.0 | 2192.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
449 | 00000804 | 0.0 | 303714.0 | 00000194 | 449.0 | 1.0 | 0.0 | 12038.0 | 2379.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
450 | 00000804 | 0.0 | 303722.0 | 00000194 | 450.0 | 1.0 | 0.0 | 12155.0 | 2571.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
451 | 00000804 | 0.0 | 303729.0 | 00000014 | 451.0 | 1.0 | 0.0 | 12155.0 | 2571.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
451 rows × 13 columns
Converting time columns to the Pandas time¶
In [426]:
try:
df['time'] = pd.to_timedelta(df['time'], unit='ms')
# To get an absolute time
# df[column] = pd.to_timedelta(df[column]) + start_time
except ValueError: # i.e. negative values
df['time'] = df['time']
display(df)
hCtx | status | time | changed | serial | csr | bttn | X | Y | npres | azi | alt | twist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 00000804 | 0.0 | 0 days 00:05:00.360000 | 000011b4 | 1.0 | 1.0 | 0.0 | 8686.0 | 4989.0 | 0.0 | 0.0 | 900.0 | 0 [300360] |
2 | 00000804 | 0.0 | 0 days 00:05:00.367000 | 00000194 | 2.0 | 1.0 | 0.0 | 8691.0 | 4998.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
3 | 00000804 | 0.0 | 0 days 00:05:00.375000 | 00000194 | 3.0 | 1.0 | 0.0 | 8694.0 | 5006.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
4 | 00000804 | 0.0 | 0 days 00:05:00.382000 | 00000194 | 4.0 | 1.0 | 0.0 | 8698.0 | 5014.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
5 | 00000804 | 0.0 | 0 days 00:05:00.390000 | 00000194 | 5.0 | 1.0 | 0.0 | 8704.0 | 5019.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
447 | 00000804 | 0.0 | 0 days 00:05:03.699000 | 00000454 | 447.0 | 1.0 | 0.0 | 11234.0 | 1822.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
448 | 00000804 | 0.0 | 0 days 00:05:03.707000 | 00000194 | 448.0 | 1.0 | 0.0 | 11844.0 | 2192.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
449 | 00000804 | 0.0 | 0 days 00:05:03.714000 | 00000194 | 449.0 | 1.0 | 0.0 | 12038.0 | 2379.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
450 | 00000804 | 0.0 | 0 days 00:05:03.722000 | 00000194 | 450.0 | 1.0 | 0.0 | 12155.0 | 2571.0 | 0.0 | 0.0 | 900.0 | 0 [ 8] |
451 | 00000804 | 0.0 | 0 days 00:05:03.729000 | 00000014 | 451.0 | 1.0 | 0.0 | 12155.0 | 2571.0 | 0.0 | 0.0 | 900.0 | 0 [ 7] |
451 rows × 13 columns
Data statistics¶
In [427]:
display(df.describe())
status | time | serial | csr | bttn | X | Y | npres | azi | alt | |
---|---|---|---|---|---|---|---|---|---|---|
count | 451.0 | 451 | 451.000000 | 451.0 | 451.000000 | 451.000000 | 451.000000 | 451.000000 | 451.0 | 451.0 |
mean | 0.0 | 0 days 00:05:02.041873614 | 226.000000 | 1.0 | 0.880266 | 8552.549889 | 4925.585366 | 458.607539 | 0.0 | 900.0 |
std | 0.0 | 0 days 00:00:00.976796061 | 130.336743 | 0.0 | 0.325011 | 2865.100936 | 2459.830061 | 177.278548 | 0.0 | 0.0 |
min | 0.0 | 0 days 00:05:00.360000 | 1.000000 | 1.0 | 0.000000 | 1651.000000 | 440.000000 | 0.000000 | 0.0 | 900.0 |
25% | 0.0 | 0 days 00:05:01.197000 | 113.500000 | 1.0 | 1.000000 | 6541.000000 | 2909.500000 | 457.000000 | 0.0 | 900.0 |
50% | 0.0 | 0 days 00:05:02.041000 | 226.000000 | 1.0 | 1.000000 | 8766.000000 | 5026.000000 | 529.000000 | 0.0 | 900.0 |
75% | 0.0 | 0 days 00:05:02.885000 | 338.500000 | 1.0 | 1.000000 | 10640.000000 | 6775.500000 | 553.000000 | 0.0 | 900.0 |
max | 0.0 | 0 days 00:05:03.729000 | 451.000000 | 1.0 | 1.000000 | 14473.000000 | 9198.000000 | 624.000000 | 0.0 | 900.0 |
Grouping by time¶
In [428]:
counted_by_time = df['time'].value_counts()
display(counted_by_time.describe())
count 451.0 mean 1.0 std 0.0 min 1.0 25% 1.0 50% 1.0 75% 1.0 max 1.0 Name: count, dtype: float64
Counting the number of elements for each group¶
In [429]:
unique_counts = counted_by_time.value_counts().sort_index()
display(unique_counts)
count 1 451 Name: count, dtype: int64
Calculating the frequency¶
In [430]:
def calculate_frequency(without_duplicates: bool) -> float:
timestamps = df['time']
if without_duplicates:
timestamps = df.drop_duplicates(subset='time', keep='last')['time']
first_time = timestamps.iloc[0]
last_time = timestamps.iloc[-1]
duration = (last_time - first_time).total_seconds()
return duration / (len(timestamps) - 1)
rt_frequency_s = round(calculate_frequency(without_duplicates=False), 4)
rt_frequency_hz = 1 / rt_frequency_s
print(f"Frequency: {rt_frequency_s} s or {rt_frequency_hz} Hz")
rt_frequency_without_duplicates_s = round(calculate_frequency(without_duplicates=True), 4)
rt_frequency_without_duplicates_hz = 1 / rt_frequency_without_duplicates_s
print(f"Without duplicates: {rt_frequency_without_duplicates_s} s or {rt_frequency_without_duplicates_hz} Hz")
rt_max_period = df['time'].diff().max().total_seconds()
print(f"Max period: {rt_max_period} s")
Frequency: 0.0075 s or 133.33333333333334 Hz Without duplicates: 0.0075 s or 133.33333333333334 Hz Max period: 0.008 s
Displaying of time series with this data¶
In [431]:
import plotly.graph_objects as go
def format_time(td: pd.Timedelta):
return '0:{:06.3f}'.format(td.total_seconds() % 60)
time_column = df['time'].map(format_time)
fig = go.Figure()
fig.add_trace(go.Scatter(x=time_column, y=df.X, mode='lines', name='x'))
fig.add_trace(go.Scatter(x=time_column, y=df.Y, mode='lines', name='y'))
fig.update_layout(
title='Coordinates by time',
xaxis_title='time',
yaxis_title='coordinates'
)
fig.update_xaxes(dtick=20)
fig.show(renderer='notebook_connected')