Time duplication analysis for the data from Wintab diagnostics tool¶
File reading¶
In [432]:
import os
from datetime import datetime
import pandas as pd
files_prefix = f'../../data'
filename = '2024-04-08_wtcapt-pc-huion.txt'
filepath = f'{files_prefix}/{filename}'
separator = ','
rows_to_skip = 50
df = pd.read_csv(filepath, sep=separator, encoding='utf-16', skiprows=rows_to_skip, on_bad_lines='warn')
df = df.rename(columns=lambda h: h.strip())
# drop single-string rows, i.e. prox
df = df.dropna(axis=0, how='any')
creation_time = datetime.utcfromtimestamp(os.path.getctime(filepath))
start_time = pd.to_datetime(creation_time)
Initial data displaying¶
In [433]:
print(f'The start time is {start_time}')
display(df)
The start time is 2024-04-12 08:14:18.223469
hCtx | status | time | serial | csr | bttn | X | Y | npres | azi | alt | twist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 00000009 | 0.0 | 98670953.0 | 5686.0 | 1.0 | 0.0 | 54169.0 | 29444.0 | 0.0 | 930.0 | 700.0 | 0 [98670953] |
2 | 00000009 | 0.0 | 98670953.0 | 5687.0 | 1.0 | 0.0 | 54169.0 | 29444.0 | 0.0 | 931.0 | 710.0 | 0 [ 0] |
3 | 00000009 | 0.0 | 98670968.0 | 5688.0 | 1.0 | 0.0 | 54168.0 | 29433.0 | 0.0 | 899.0 | 720.0 | 0 [ 15] |
4 | 00000009 | 0.0 | 98670968.0 | 5689.0 | 1.0 | 0.0 | 54168.0 | 29420.0 | 0.0 | 899.0 | 730.0 | 0 [ 0] |
5 | 00000009 | 0.0 | 98670968.0 | 5690.0 | 1.0 | 0.0 | 54168.0 | 29406.0 | 0.0 | 899.0 | 730.0 | 0 [ 0] |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
781 | 00000009 | 0.0 | 98674781.0 | 6464.0 | 1.0 | 0.0 | 64046.0 | 30704.0 | 0.0 | 1209.0 | 780.0 | 0 [ 0] |
782 | 00000009 | 0.0 | 98674781.0 | 6465.0 | 1.0 | 0.0 | 64072.0 | 30859.0 | 0.0 | 1249.0 | 770.0 | 0 [ 0] |
783 | 00000009 | 0.0 | 98674796.0 | 6466.0 | 1.0 | 0.0 | 64072.0 | 30859.0 | 0.0 | 1249.0 | 770.0 | 0 [ 15] |
784 | 00000009 | 0.0 | 98674796.0 | 6467.0 | 1.0 | 0.0 | 64072.0 | 30859.0 | 0.0 | 1249.0 | 770.0 | 0 [ 0] |
785 | 00000009 | 0.0 | 98674796.0 | 6468.0 | 1.0 | 0.0 | 0.0 | 38720.0 | 0.0 | 0.0 | 900.0 | 0 [ 0] |
783 rows × 12 columns
Converting time columns to the Pandas time¶
In [434]:
try:
df['time'] = pd.to_timedelta(df['time'], unit='ms')
# To get an absolute time
# df[column] = pd.to_timedelta(df[column]) + start_time
except ValueError: # i.e. negative values
df['time'] = df['time']
display(df)
hCtx | status | time | serial | csr | bttn | X | Y | npres | azi | alt | twist | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 00000009 | 0.0 | 1 days 03:24:30.953000 | 5686.0 | 1.0 | 0.0 | 54169.0 | 29444.0 | 0.0 | 930.0 | 700.0 | 0 [98670953] |
2 | 00000009 | 0.0 | 1 days 03:24:30.953000 | 5687.0 | 1.0 | 0.0 | 54169.0 | 29444.0 | 0.0 | 931.0 | 710.0 | 0 [ 0] |
3 | 00000009 | 0.0 | 1 days 03:24:30.968000 | 5688.0 | 1.0 | 0.0 | 54168.0 | 29433.0 | 0.0 | 899.0 | 720.0 | 0 [ 15] |
4 | 00000009 | 0.0 | 1 days 03:24:30.968000 | 5689.0 | 1.0 | 0.0 | 54168.0 | 29420.0 | 0.0 | 899.0 | 730.0 | 0 [ 0] |
5 | 00000009 | 0.0 | 1 days 03:24:30.968000 | 5690.0 | 1.0 | 0.0 | 54168.0 | 29406.0 | 0.0 | 899.0 | 730.0 | 0 [ 0] |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
781 | 00000009 | 0.0 | 1 days 03:24:34.781000 | 6464.0 | 1.0 | 0.0 | 64046.0 | 30704.0 | 0.0 | 1209.0 | 780.0 | 0 [ 0] |
782 | 00000009 | 0.0 | 1 days 03:24:34.781000 | 6465.0 | 1.0 | 0.0 | 64072.0 | 30859.0 | 0.0 | 1249.0 | 770.0 | 0 [ 0] |
783 | 00000009 | 0.0 | 1 days 03:24:34.796000 | 6466.0 | 1.0 | 0.0 | 64072.0 | 30859.0 | 0.0 | 1249.0 | 770.0 | 0 [ 15] |
784 | 00000009 | 0.0 | 1 days 03:24:34.796000 | 6467.0 | 1.0 | 0.0 | 64072.0 | 30859.0 | 0.0 | 1249.0 | 770.0 | 0 [ 0] |
785 | 00000009 | 0.0 | 1 days 03:24:34.796000 | 6468.0 | 1.0 | 0.0 | 0.0 | 38720.0 | 0.0 | 0.0 | 900.0 | 0 [ 0] |
783 rows × 12 columns
Data statistics¶
In [435]:
display(df.describe())
status | time | serial | csr | bttn | X | Y | npres | azi | alt | |
---|---|---|---|---|---|---|---|---|---|---|
count | 783.0 | 783 | 783.000000 | 783.0 | 783.000000 | 783.000000 | 783.000000 | 783.000000 | 783.000000 | 783.000000 |
mean | 0.0 | 1 days 03:24:32.951441890 | 6077.000000 | 1.0 | 0.795658 | 58930.427842 | 24809.664112 | 2994.487867 | 1073.888889 | 723.754789 |
std | 0.0 | 0 days 00:00:01.125225663 | 226.176922 | 0.0 | 0.403478 | 4412.276862 | 4447.595799 | 1989.689931 | 199.286994 | 67.486486 |
min | 0.0 | 1 days 03:24:30.953000 | 5686.000000 | 1.0 | 0.000000 | 0.000000 | 17364.000000 | 0.000000 | 0.000000 | 560.000000 |
25% | 0.0 | 1 days 03:24:32.117000 | 5881.500000 | 1.0 | 1.000000 | 56018.500000 | 20501.000000 | 2058.000000 | 935.000000 | 690.000000 |
50% | 0.0 | 1 days 03:24:33.015000 | 6077.000000 | 1.0 | 1.000000 | 58939.000000 | 25025.000000 | 2823.000000 | 1018.000000 | 730.000000 |
75% | 0.0 | 1 days 03:24:33.906000 | 6272.500000 | 1.0 | 1.000000 | 62187.000000 | 29320.000000 | 4436.000000 | 1144.000000 | 765.000000 |
max | 0.0 | 1 days 03:24:34.796000 | 6468.000000 | 1.0 | 1.000000 | 64627.000000 | 38720.000000 | 6747.000000 | 1705.000000 | 900.000000 |
Grouping by time¶
In [436]:
counted_by_time = df['time'].value_counts()
display(counted_by_time.describe())
count 221.000000 mean 3.542986 std 0.649621 min 1.000000 25% 3.000000 50% 4.000000 75% 4.000000 max 4.000000 Name: count, dtype: float64
Counting the number of elements for each group¶
In [437]:
unique_counts = counted_by_time.value_counts().sort_index()
display(unique_counts)
count 1 6 2 1 3 81 4 133 Name: count, dtype: int64
Calculating the frequency¶
In [438]:
def calculate_frequency(without_duplicates: bool) -> float:
timestamps = df['time']
if without_duplicates:
timestamps = df.drop_duplicates(subset='time', keep='last')['time']
first_time = timestamps.iloc[0]
last_time = timestamps.iloc[-1]
duration = (last_time - first_time).total_seconds()
return duration / (len(timestamps) - 1)
rt_frequency_s = round(calculate_frequency(without_duplicates=False), 4)
rt_frequency_hz = 1 / rt_frequency_s
print(f"Frequency: {rt_frequency_s} s or {rt_frequency_hz} Hz")
rt_frequency_without_duplicates_s = round(calculate_frequency(without_duplicates=True), 4)
rt_frequency_without_duplicates_hz = 1 / rt_frequency_without_duplicates_s
print(f"Without duplicates: {rt_frequency_without_duplicates_s} s or {rt_frequency_without_duplicates_hz} Hz")
rt_max_period = df['time'].diff().max().total_seconds()
print(f"Max period: {rt_max_period} s")
Frequency: 0.0049 s or 204.08163265306123 Hz Without duplicates: 0.0175 s or 57.14285714285714 Hz Max period: 0.281 s
Displaying of time series with this data¶
In [439]:
import plotly.graph_objects as go
def format_time(td: pd.Timedelta):
return '0:{:06.3f}'.format(td.total_seconds() % 60)
time_column = df['time'].map(format_time)
fig = go.Figure()
fig.add_trace(go.Scatter(x=time_column, y=df.X, mode='lines', name='x'))
fig.add_trace(go.Scatter(x=time_column, y=df.Y, mode='lines', name='y'))
fig.update_layout(
title='Coordinates by time',
xaxis_title='time',
yaxis_title='coordinates'
)
fig.update_xaxes(dtick=20)
fig.show(renderer='notebook_connected')