import logging

import pandas as pd

files_prefix = f'../../data'
filename = '2024-04-08_22-46-16_huion-java.txt'
filepath = f'{files_prefix}/{filename}'

comment_mark = '#'
separator = '\\s+'

df = pd.read_csv(filepath, sep=separator, comment=comment_mark)

import os
from datetime import datetime as dt

start_time = None
time_header = 'Start time'

print('Data header:')
with open(filepath, 'r') as file:
    for row in file:
        values = row.split(' ')
        if time_header in row:
            start_time = pd.to_datetime(row.split(' ')[-1])

        if row.strip() == '':
            break
        print(row.strip(comment_mark + '\n'))

if start_time is None:
    creation_time = dt.utcfromtimestamp(os.path.getctime(filepath))
    start_time = pd.to_datetime(creation_time)

    logging.warning("The start time value is not present in the header. The file creation date is selected")

WARNING:root:The start time value is not present in the header. The file creation date is selected

Data header:
 Tablet rate 250
 Screen dimension width=1920 height=1080
 Tablet size width=344.16 height=193.59
 Period 25

print(f'The start time is {start_time}')

display(df)

The start time is 2024-04-13 08:31:42.816981

time_from_start_columns = ['registeredTime', 'scheduledTime']
delta_columns = ['registeredDelta', 'scheduledDelta']

for column in time_from_start_columns:
    try:
        df[column] = pd.to_timedelta(df[column])

        # To get an absolute time
        # df[column] = pd.to_timedelta(df[column]) + start_time
    except ValueError:  # i.e. negative values
        df[column] = df[column]

for column in delta_columns:
    df[column] = pd.to_timedelta(df[column]).dt.total_seconds() * 1000  # ms

import datetime


def calculate_register_start_time():
    start_time_dt = start_time.to_pydatetime()
    first_registered_str = df.loc[0, 'registeredTime']
    first_scheduled_dt = df.loc[0, 'scheduledTime'].to_pytimedelta()

    register_delta = convert_to_timedelta(first_registered_str)
    register_start_time = start_time_dt + register_delta - first_scheduled_dt

    return register_start_time


def recalculate_registered_time(registered_time: str, tablet_start_time: pd.Timedelta) -> pd.Timedelta:
    delta = pd.to_timedelta(convert_to_timedelta(registered_time))

    return (start_time + delta) - tablet_start_time


def convert_to_timedelta(delta: str) -> datetime.timedelta:
    """
    Parses a time delta in the H:M:S.ms format
    """

    h, m, s_ms = delta.split(':')
    s, ms = s_ms.split('.')
    h, m, s, ms = map(lambda t: int(t), [h, m, s, ms])

    return datetime.timedelta(hours=h, minutes=m, seconds=s, milliseconds=ms)


try:
    pd.to_timedelta(df['registeredTime'])
except ValueError:
    tablet_start_time = pd.to_datetime(calculate_register_start_time())
    df['registeredTime'] = df['registeredTime'].map(lambda t: recalculate_registered_time(t, tablet_start_time))

display(df)

display(df.describe())

counted_by_registered = df['registeredTime'].value_counts()
display(counted_by_registered.describe())

display(counted_by_registered.value_counts().sort_index())

count    269.000000
mean       3.442379
std        0.554348
min        1.000000
25%        3.000000
50%        3.000000
75%        4.000000
max        6.000000
Name: count, dtype: float64

count
1      1
2      1
3    148
4    117
5      1
6      1
Name: count, dtype: int64

counted_by_scheduled = df['scheduledTime'].value_counts()
display(counted_by_scheduled.describe())

display(counted_by_scheduled.value_counts().sort_index())

count    389.000000
mean       2.380463
std        0.608555
min        1.000000
25%        2.000000
50%        2.000000
75%        3.000000
max        5.000000
Name: count, dtype: float64

count
1     12
2    229
3    138
4      8
5      2
Name: count, dtype: int64

def calculate_frequency(time_column: str, without_duplicates: bool) -> float:
    timestamps = df[time_column]
    if without_duplicates:
        timestamps = df.drop_duplicates(subset=time_column, keep='last')[time_column]

    first_time = timestamps.iloc[0]
    last_time = timestamps.iloc[-1]
    duration = (last_time - first_time).total_seconds()

    return duration / (len(timestamps) - 1)

rt_frequency_s = calculate_frequency('registeredTime', without_duplicates=False)
rt_frequency_hz = 1 / rt_frequency_s
print(f"Register frequency: {rt_frequency_s} s or {rt_frequency_hz} Hz")

rt_frequency_without_duplicates_s = calculate_frequency('registeredTime', without_duplicates=True)
rt_frequency_without_duplicates_hz = 1 / rt_frequency_without_duplicates_s
print(f"Without duplicates: {rt_frequency_without_duplicates_s} s or {rt_frequency_without_duplicates_hz} Hz")

rt_max_period = df.loc[1:, 'registeredDelta'].max()
print(f"Register max period: {rt_max_period / 1000} s")

Register frequency: 0.004526486486486487 s or 220.92190112252206 Hz
Without duplicates: 0.01562313432835821 s or 64.0076427036064 Hz
Register max period: 0.016 s

st_frequency_s = calculate_frequency('scheduledTime', without_duplicates=False)
st_frequency_hz = 1 / st_frequency_s
print(f"Schedule frequency: {st_frequency_s} s or {st_frequency_hz} Hz")

st_frequency_without_duplicates_s = calculate_frequency('scheduledTime', without_duplicates=True)
st_frequency_without_duplicates_hz = 1 / st_frequency_without_duplicates_s
print(f"Without duplicates: {st_frequency_without_duplicates_s} s or {st_frequency_without_duplicates_hz} Hz")

st_max_period = df.loc[1:, 'scheduledDelta'].max()
print(f"Schedule max period: {st_max_period / 1000} s")

Schedule frequency: 0.004524324324324324 s or 221.02747909199525 Hz
Without duplicates: 0.010786082474226803 s or 92.7120669056153 Hz
Schedule max period: 0.023 s

import plotly.graph_objects as go


def display_time_series(selected_time_column: str):
    time_column = df[selected_time_column].map(format_time)

    fig = go.Figure()

    fig.add_trace(go.Scatter(x=time_column, y=df.x, mode='lines', name='x'))
    fig.add_trace(go.Scatter(x=time_column, y=df.y, mode='lines', name='y'))

    fig.update_layout(
        title='Coordinates by time',
        xaxis_title=f'{selected_time_column} (m:s.ms)',
        yaxis_title='coordinates'
    )

    fig.update_xaxes(dtick=20, tickangle=30)

    fig.show(renderer='notebook_connected')


def format_time(td: pd.Timedelta):
    return '{:02d}:{:06.3f}'.format(int(td.total_seconds() // 60), td.total_seconds() % 60)

display_time_series('registeredTime')

display_time_series('scheduledTime')

	registeredTime	scheduledTime	registeredDelta	scheduledDelta	availableMs	x	y	tiltX	tiltY	pressure
0	-475696:-28:-39.741	0:0:0.819	0:0:0.000	0:0:0.000	25	12.90600	-19.717503	0.147280	0.055411	0.124405
1	-475696:-28:-39.741	0:0:0.819	0:0:0.000	0:0:0.000	25	12.54750	-19.896755	0.147280	0.055411	0.183128
2	-475696:-28:-39.741	0:0:0.835	0:0:0.000	0:0:0.016	25	12.18900	-20.075992	0.148434	0.093169	0.226590
3	-475696:-28:-39.756	0:0:0.835	0:0:0.015	0:0:0.000	25	12.00975	-20.255245	0.168399	0.093960	0.251740
4	-475696:-28:-39.756	0:0:0.835	0:0:0.000	0:0:0.000	25	11.65125	-20.613750	0.160511	0.107156	0.269198
...	...	...	...	...	...	...	...	...	...	...
921	-475696:-28:-35.912	0:0:4.993	0:0:0.000	0:0:0.011	25	144.47550	-79.407750	0.291711	0.166940	0.364058
922	-475696:-28:-35.912	0:0:4.993	0:0:0.000	0:0:0.000	25	146.44725	-79.407750	0.282978	0.143975	0.248077
923	-475696:-28:-35.928	0:0:4.993	0:0:0.016	0:0:0.000	25	148.23975	-79.228500	0.263660	0.142558	0.143938
924	-475696:-28:-35.928	0:0:5.004	0:0:0.000	0:0:0.011	25	149.67375	-79.049245	0.270305	0.128495	0.059822
925	-475696:-28:-35.928	0:0:5.004	0:0:0.000	0:0:0.000	25	150.92850	-78.869995	0.251198	0.127254	0.007081

	registeredTime	scheduledTime	registeredDelta	scheduledDelta	availableMs	x	y	tiltX	tiltY	pressure
0	0 days 00:00:00.819000	0 days 00:00:00.819000	0.0	0.0	25	12.90600	-19.717503	0.147280	0.055411	0.124405
1	0 days 00:00:00.819000	0 days 00:00:00.819000	0.0	0.0	25	12.54750	-19.896755	0.147280	0.055411	0.183128
2	0 days 00:00:00.819000	0 days 00:00:00.835000	0.0	16.0	25	12.18900	-20.075992	0.148434	0.093169	0.226590
3	0 days 00:00:00.834000	0 days 00:00:00.835000	15.0	0.0	25	12.00975	-20.255245	0.168399	0.093960	0.251740
4	0 days 00:00:00.834000	0 days 00:00:00.835000	0.0	0.0	25	11.65125	-20.613750	0.160511	0.107156	0.269198
...	...	...	...	...	...	...	...	...	...	...
921	0 days 00:00:04.990000	0 days 00:00:04.993000	0.0	11.0	25	144.47550	-79.407750	0.291711	0.166940	0.364058
922	0 days 00:00:04.990000	0 days 00:00:04.993000	0.0	0.0	25	146.44725	-79.407750	0.282978	0.143975	0.248077
923	0 days 00:00:05.006000	0 days 00:00:04.993000	16.0	0.0	25	148.23975	-79.228500	0.263660	0.142558	0.143938
924	0 days 00:00:05.006000	0 days 00:00:05.004000	0.0	11.0	25	149.67375	-79.049245	0.270305	0.128495	0.059822
925	0 days 00:00:05.006000	0 days 00:00:05.004000	0.0	0.0	25	150.92850	-78.869995	0.251198	0.127254	0.007081

	registeredTime	scheduledTime	registeredDelta	scheduledDelta	x	y	tiltX	tiltY	pressure
count	926	926	926.000000	926.000000	926.000000	926.000000	926.000000	926.000000	926.000000
mean	0 days 00:00:02.913481641	0 days 00:00:02.913210583	4.521598	4.519438	89.091508	-45.652613	0.279436	0.209649	0.618491
std	0 days 00:00:01.210993335	0 days 00:00:01.210882965	7.093588	5.513928	47.337068	19.867742	0.072402	0.060057	0.145330
min	0 days 00:00:00.819000	0 days 00:00:00.819000	0.000000	0.000000	9.500250	-86.040000	0.127694	0.055411	0.007081
25%	0 days 00:00:01.865000	0 days 00:00:01.862750	0.000000	0.000000	48.980063	-60.945000	0.231788	0.169633	0.487761
50%	0 days 00:00:02.912000	0 days 00:00:02.912000	0.000000	0.000000	88.728750	-46.604995	0.284107	0.219967	0.684959
75%	0 days 00:00:03.959000	0 days 00:00:03.962000	15.000000	11.000000	129.149625	-28.142242	0.342257	0.257720	0.739684
max	0 days 00:00:05.006000	0 days 00:00:05.004000	16.000000	23.000000	174.051750	-13.622999	0.404596	0.316523	0.791845

Time duplication analysis for the Java-format data¶

File reading¶

Reading start time¶

Initial data displaying¶

Converting time columns to the Pandas time¶

Fixing negative time shift if necessary¶

Data statistics¶

Grouping by time¶

Registered time¶

Scheduled time¶

Calculating frequency¶

By registered time¶

By scheduled time¶

Displaying of time series¶

By registered time¶

By scheduled time¶