import os
from datetime import datetime

import pandas as pd

files_prefix = f'../../data'
filename = '2024-04-09_wtcapt-pc-bamboo.txt'
filepath = f'{files_prefix}/{filename}'

separator = ','
rows_to_skip = 50

df = pd.read_csv(filepath, sep=separator, encoding='utf-16', skiprows=rows_to_skip, on_bad_lines='warn')
df = df.rename(columns=lambda h: h.strip())

# drop single-string rows, i.e. prox
df = df.dropna(axis=0, how='any')

creation_time = datetime.utcfromtimestamp(os.path.getctime(filepath))
start_time = pd.to_datetime(creation_time)

print(f'The start time is {start_time}')

display(df)

The start time is 2024-04-12 14:12:42.280035

try:
    df['time'] = pd.to_timedelta(df['time'], unit='ms')

    # To get an absolute time
    # df[column] = pd.to_timedelta(df[column]) + start_time
except ValueError:  # i.e. negative values
    df['time'] = df['time']

display(df)

display(df.describe())

counted_by_time = df['time'].value_counts()
display(counted_by_time.describe())

count    451.0
mean       1.0
std        0.0
min        1.0
25%        1.0
50%        1.0
75%        1.0
max        1.0
Name: count, dtype: float64

unique_counts = counted_by_time.value_counts().sort_index()
display(unique_counts)

count
1    451
Name: count, dtype: int64

def calculate_frequency(without_duplicates: bool) -> float:
    timestamps = df['time']
    if without_duplicates:
        timestamps = df.drop_duplicates(subset='time', keep='last')['time']

    first_time = timestamps.iloc[0]
    last_time = timestamps.iloc[-1]

    duration = (last_time - first_time).total_seconds()

    return duration / (len(timestamps) - 1)


rt_frequency_s = round(calculate_frequency(without_duplicates=False), 4)
rt_frequency_hz = 1 / rt_frequency_s
print(f"Frequency: {rt_frequency_s} s or {rt_frequency_hz} Hz")

rt_frequency_without_duplicates_s = round(calculate_frequency(without_duplicates=True), 4)
rt_frequency_without_duplicates_hz = 1 / rt_frequency_without_duplicates_s
print(f"Without duplicates: {rt_frequency_without_duplicates_s} s or {rt_frequency_without_duplicates_hz} Hz")

rt_max_period = df['time'].diff().max().total_seconds()
print(f"Max period: {rt_max_period} s")

Frequency: 0.0075 s or 133.33333333333334 Hz
Without duplicates: 0.0075 s or 133.33333333333334 Hz
Max period: 0.008 s

import plotly.graph_objects as go


def format_time(td: pd.Timedelta):
    return '0:{:06.3f}'.format(td.total_seconds() % 60)


time_column = df['time'].map(format_time)

fig = go.Figure()

fig.add_trace(go.Scatter(x=time_column, y=df.X, mode='lines', name='x'))
fig.add_trace(go.Scatter(x=time_column, y=df.Y, mode='lines', name='y'))

fig.update_layout(
    title='Coordinates by time',
    xaxis_title='time',
    yaxis_title='coordinates'
)

fig.update_xaxes(dtick=20)

fig.show(renderer='notebook_connected')

	hCtx	status	time	changed	serial	csr	bttn	X	Y	npres	azi	alt	twist
1	00000804	0.0	300360.0	000011b4	1.0	1.0	0.0	8686.0	4989.0	0.0	0.0	900.0	0 [300360]
2	00000804	0.0	300367.0	00000194	2.0	1.0	0.0	8691.0	4998.0	0.0	0.0	900.0	0 [ 7]
3	00000804	0.0	300375.0	00000194	3.0	1.0	0.0	8694.0	5006.0	0.0	0.0	900.0	0 [ 8]
4	00000804	0.0	300382.0	00000194	4.0	1.0	0.0	8698.0	5014.0	0.0	0.0	900.0	0 [ 7]
5	00000804	0.0	300390.0	00000194	5.0	1.0	0.0	8704.0	5019.0	0.0	0.0	900.0	0 [ 8]
...	...	...	...	...	...	...	...	...	...	...	...	...	...
447	00000804	0.0	303699.0	00000454	447.0	1.0	0.0	11234.0	1822.0	0.0	0.0	900.0	0 [ 7]
448	00000804	0.0	303707.0	00000194	448.0	1.0	0.0	11844.0	2192.0	0.0	0.0	900.0	0 [ 8]
449	00000804	0.0	303714.0	00000194	449.0	1.0	0.0	12038.0	2379.0	0.0	0.0	900.0	0 [ 7]
450	00000804	0.0	303722.0	00000194	450.0	1.0	0.0	12155.0	2571.0	0.0	0.0	900.0	0 [ 8]
451	00000804	0.0	303729.0	00000014	451.0	1.0	0.0	12155.0	2571.0	0.0	0.0	900.0	0 [ 7]

	hCtx	status	time	changed	serial	csr	bttn	X	Y	npres	azi	alt	twist
1	00000804	0.0	0 days 00:05:00.360000	000011b4	1.0	1.0	0.0	8686.0	4989.0	0.0	0.0	900.0	0 [300360]
2	00000804	0.0	0 days 00:05:00.367000	00000194	2.0	1.0	0.0	8691.0	4998.0	0.0	0.0	900.0	0 [ 7]
3	00000804	0.0	0 days 00:05:00.375000	00000194	3.0	1.0	0.0	8694.0	5006.0	0.0	0.0	900.0	0 [ 8]
4	00000804	0.0	0 days 00:05:00.382000	00000194	4.0	1.0	0.0	8698.0	5014.0	0.0	0.0	900.0	0 [ 7]
5	00000804	0.0	0 days 00:05:00.390000	00000194	5.0	1.0	0.0	8704.0	5019.0	0.0	0.0	900.0	0 [ 8]
...	...	...	...	...	...	...	...	...	...	...	...	...	...
447	00000804	0.0	0 days 00:05:03.699000	00000454	447.0	1.0	0.0	11234.0	1822.0	0.0	0.0	900.0	0 [ 7]
448	00000804	0.0	0 days 00:05:03.707000	00000194	448.0	1.0	0.0	11844.0	2192.0	0.0	0.0	900.0	0 [ 8]
449	00000804	0.0	0 days 00:05:03.714000	00000194	449.0	1.0	0.0	12038.0	2379.0	0.0	0.0	900.0	0 [ 7]
450	00000804	0.0	0 days 00:05:03.722000	00000194	450.0	1.0	0.0	12155.0	2571.0	0.0	0.0	900.0	0 [ 8]
451	00000804	0.0	0 days 00:05:03.729000	00000014	451.0	1.0	0.0	12155.0	2571.0	0.0	0.0	900.0	0 [ 7]

	status	time	serial	csr	bttn	X	Y	npres	azi	alt
count	451.0	451	451.000000	451.0	451.000000	451.000000	451.000000	451.000000	451.0	451.0
mean	0.0	0 days 00:05:02.041873614	226.000000	1.0	0.880266	8552.549889	4925.585366	458.607539	0.0	900.0
std	0.0	0 days 00:00:00.976796061	130.336743	0.0	0.325011	2865.100936	2459.830061	177.278548	0.0	0.0
min	0.0	0 days 00:05:00.360000	1.000000	1.0	0.000000	1651.000000	440.000000	0.000000	0.0	900.0
25%	0.0	0 days 00:05:01.197000	113.500000	1.0	1.000000	6541.000000	2909.500000	457.000000	0.0	900.0
50%	0.0	0 days 00:05:02.041000	226.000000	1.0	1.000000	8766.000000	5026.000000	529.000000	0.0	900.0
75%	0.0	0 days 00:05:02.885000	338.500000	1.0	1.000000	10640.000000	6775.500000	553.000000	0.0	900.0
max	0.0	0 days 00:05:03.729000	451.000000	1.0	1.000000	14473.000000	9198.000000	624.000000	0.0	900.0

Time duplication analysis for the data from Wintab diagnostics tool¶

File reading¶

Initial data displaying¶

Converting time columns to the Pandas time¶

Data statistics¶

Grouping by time¶

Counting the number of elements for each group¶

Calculating the frequency¶

Displaying of time series with this data¶