import os
from datetime import datetime

import pandas as pd

files_prefix = f'../../data'
filename = '2024-04-08_wtcapt-pc-huion.txt'
filepath = f'{files_prefix}/{filename}'

separator = ','
rows_to_skip = 50

df = pd.read_csv(filepath, sep=separator, encoding='utf-16', skiprows=rows_to_skip, on_bad_lines='warn')
df = df.rename(columns=lambda h: h.strip())

# drop single-string rows, i.e. prox
df = df.dropna(axis=0, how='any')

creation_time = datetime.utcfromtimestamp(os.path.getctime(filepath))
start_time = pd.to_datetime(creation_time)

print(f'The start time is {start_time}')

display(df)

The start time is 2024-04-12 08:14:18.223469

try:
    df['time'] = pd.to_timedelta(df['time'], unit='ms')

    # To get an absolute time
    # df[column] = pd.to_timedelta(df[column]) + start_time
except ValueError:  # i.e. negative values
    df['time'] = df['time']

display(df)

display(df.describe())

counted_by_time = df['time'].value_counts()
display(counted_by_time.describe())

count    221.000000
mean       3.542986
std        0.649621
min        1.000000
25%        3.000000
50%        4.000000
75%        4.000000
max        4.000000
Name: count, dtype: float64

unique_counts = counted_by_time.value_counts().sort_index()
display(unique_counts)

count
1      6
2      1
3     81
4    133
Name: count, dtype: int64

def calculate_frequency(without_duplicates: bool) -> float:
    timestamps = df['time']
    if without_duplicates:
        timestamps = df.drop_duplicates(subset='time', keep='last')['time']

    first_time = timestamps.iloc[0]
    last_time = timestamps.iloc[-1]

    duration = (last_time - first_time).total_seconds()

    return duration / (len(timestamps) - 1)


rt_frequency_s = round(calculate_frequency(without_duplicates=False), 4)
rt_frequency_hz = 1 / rt_frequency_s
print(f"Frequency: {rt_frequency_s} s or {rt_frequency_hz} Hz")

rt_frequency_without_duplicates_s = round(calculate_frequency(without_duplicates=True), 4)
rt_frequency_without_duplicates_hz = 1 / rt_frequency_without_duplicates_s
print(f"Without duplicates: {rt_frequency_without_duplicates_s} s or {rt_frequency_without_duplicates_hz} Hz")

rt_max_period = df['time'].diff().max().total_seconds()
print(f"Max period: {rt_max_period} s")

Frequency: 0.0049 s or 204.08163265306123 Hz
Without duplicates: 0.0175 s or 57.14285714285714 Hz
Max period: 0.281 s

import plotly.graph_objects as go


def format_time(td: pd.Timedelta):
    return '0:{:06.3f}'.format(td.total_seconds() % 60)


time_column = df['time'].map(format_time)

fig = go.Figure()

fig.add_trace(go.Scatter(x=time_column, y=df.X, mode='lines', name='x'))
fig.add_trace(go.Scatter(x=time_column, y=df.Y, mode='lines', name='y'))

fig.update_layout(
    title='Coordinates by time',
    xaxis_title='time',
    yaxis_title='coordinates'
)

fig.update_xaxes(dtick=20)

fig.show(renderer='notebook_connected')

	hCtx	status	time	serial	csr	bttn	X	Y	npres	azi	alt	twist
1	00000009	0.0	98670953.0	5686.0	1.0	0.0	54169.0	29444.0	0.0	930.0	700.0	0 [98670953]
2	00000009	0.0	98670953.0	5687.0	1.0	0.0	54169.0	29444.0	0.0	931.0	710.0	0 [ 0]
3	00000009	0.0	98670968.0	5688.0	1.0	0.0	54168.0	29433.0	0.0	899.0	720.0	0 [ 15]
4	00000009	0.0	98670968.0	5689.0	1.0	0.0	54168.0	29420.0	0.0	899.0	730.0	0 [ 0]
5	00000009	0.0	98670968.0	5690.0	1.0	0.0	54168.0	29406.0	0.0	899.0	730.0	0 [ 0]
...	...	...	...	...	...	...	...	...	...	...	...	...
781	00000009	0.0	98674781.0	6464.0	1.0	0.0	64046.0	30704.0	0.0	1209.0	780.0	0 [ 0]
782	00000009	0.0	98674781.0	6465.0	1.0	0.0	64072.0	30859.0	0.0	1249.0	770.0	0 [ 0]
783	00000009	0.0	98674796.0	6466.0	1.0	0.0	64072.0	30859.0	0.0	1249.0	770.0	0 [ 15]
784	00000009	0.0	98674796.0	6467.0	1.0	0.0	64072.0	30859.0	0.0	1249.0	770.0	0 [ 0]
785	00000009	0.0	98674796.0	6468.0	1.0	0.0	0.0	38720.0	0.0	0.0	900.0	0 [ 0]

	hCtx	status	time	serial	csr	bttn	X	Y	npres	azi	alt	twist
1	00000009	0.0	1 days 03:24:30.953000	5686.0	1.0	0.0	54169.0	29444.0	0.0	930.0	700.0	0 [98670953]
2	00000009	0.0	1 days 03:24:30.953000	5687.0	1.0	0.0	54169.0	29444.0	0.0	931.0	710.0	0 [ 0]
3	00000009	0.0	1 days 03:24:30.968000	5688.0	1.0	0.0	54168.0	29433.0	0.0	899.0	720.0	0 [ 15]
4	00000009	0.0	1 days 03:24:30.968000	5689.0	1.0	0.0	54168.0	29420.0	0.0	899.0	730.0	0 [ 0]
5	00000009	0.0	1 days 03:24:30.968000	5690.0	1.0	0.0	54168.0	29406.0	0.0	899.0	730.0	0 [ 0]
...	...	...	...	...	...	...	...	...	...	...	...	...
781	00000009	0.0	1 days 03:24:34.781000	6464.0	1.0	0.0	64046.0	30704.0	0.0	1209.0	780.0	0 [ 0]
782	00000009	0.0	1 days 03:24:34.781000	6465.0	1.0	0.0	64072.0	30859.0	0.0	1249.0	770.0	0 [ 0]
783	00000009	0.0	1 days 03:24:34.796000	6466.0	1.0	0.0	64072.0	30859.0	0.0	1249.0	770.0	0 [ 15]
784	00000009	0.0	1 days 03:24:34.796000	6467.0	1.0	0.0	64072.0	30859.0	0.0	1249.0	770.0	0 [ 0]
785	00000009	0.0	1 days 03:24:34.796000	6468.0	1.0	0.0	0.0	38720.0	0.0	0.0	900.0	0 [ 0]

	status	time	serial	csr	bttn	X	Y	npres	azi	alt
count	783.0	783	783.000000	783.0	783.000000	783.000000	783.000000	783.000000	783.000000	783.000000
mean	0.0	1 days 03:24:32.951441890	6077.000000	1.0	0.795658	58930.427842	24809.664112	2994.487867	1073.888889	723.754789
std	0.0	0 days 00:00:01.125225663	226.176922	0.0	0.403478	4412.276862	4447.595799	1989.689931	199.286994	67.486486
min	0.0	1 days 03:24:30.953000	5686.000000	1.0	0.000000	0.000000	17364.000000	0.000000	0.000000	560.000000
25%	0.0	1 days 03:24:32.117000	5881.500000	1.0	1.000000	56018.500000	20501.000000	2058.000000	935.000000	690.000000
50%	0.0	1 days 03:24:33.015000	6077.000000	1.0	1.000000	58939.000000	25025.000000	2823.000000	1018.000000	730.000000
75%	0.0	1 days 03:24:33.906000	6272.500000	1.0	1.000000	62187.000000	29320.000000	4436.000000	1144.000000	765.000000
max	0.0	1 days 03:24:34.796000	6468.000000	1.0	1.000000	64627.000000	38720.000000	6747.000000	1705.000000	900.000000

Time duplication analysis for the data from Wintab diagnostics tool¶

File reading¶

Initial data displaying¶

Converting time columns to the Pandas time¶

Data statistics¶

Grouping by time¶

Counting the number of elements for each group¶

Calculating the frequency¶

Displaying of time series with this data¶