Motion analysis¶
The processing algorithm is taken from the study http://elartu.tntu.edu.ua/handle/lib/35810
and https://github.com/TNTU-121-SE-research/ANM_Data-Preprocess
Data reading¶
In [1403]:
import logging
import pandas as pd
files_prefix = f'../../data'
filename = '2024-04-17_19-53-14-mouse-for-analysis.txt'
filepath = f'{files_prefix}/{filename}'
comment_mark = '#'
separator = '\\s+'
df = pd.read_csv(filepath, sep=separator, comment=comment_mark)
Reading start time¶
Start time is expected in the format without spaces
In [1404]:
import os
from datetime import datetime
start_time = None
time_header = 'Start time'
print('Data header:')
with open(filepath, 'r') as file:
for row in file:
values = row.split(' ')
if time_header in row:
start_time = pd.to_datetime(row.split(' ')[-1])
if row.strip() == '':
break
print(row.strip(comment_mark + '\n'))
if start_time is None:
creation_time = datetime.utcfromtimestamp(os.path.getctime(filepath))
start_time = pd.to_datetime(creation_time)
logging.warning("The start time value is not present in the header. The file creation date is selected")
Data header: Tablet rate 250 Screen dimension width=1920 height=1080 Tablet size width=344.16 height=193.59 Event period 0 Start time 2024-04-17T16:52:56.912Z Fields description absoluteTime The absolute time when the event occurred (hardware-depends) registeredTime The time when the event occurred (elapsed from the start time) scheduledTime The time when the event is provided by the library (elapsed from the start time) registered-scheduled deltas The time difference between adjacent events for both timestamps x-y The coordinates at this time tiltX-tiltY The tilts in the respective axes pressure The pen pressure (z-coordinate)
The data output¶
In [1405]:
display(df)
absoluteTime | registeredTime | scheduledTime | registeredDelta | scheduledDelta | x | y | tiltX | tiltY | pressure | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1713372778423 | 0:0:1.511 | 0:0:1.511 | 0:0:0.000 | 0:0:0.000 | 63.09600 | -53.95425 | 0.0 | 0.0 | 0.0 |
1 | 1713372778431 | 0:0:1.519 | 0:0:1.520 | 0:0:0.008 | 0:0:0.009 | 63.27525 | -53.95425 | 0.0 | 0.0 | 0.0 |
2 | 1713372778439 | 0:0:1.527 | 0:0:1.528 | 0:0:0.008 | 0:0:0.008 | 63.27525 | -53.77500 | 0.0 | 0.0 | 0.0 |
3 | 1713372778443 | 0:0:1.531 | 0:0:1.531 | 0:0:0.004 | 0:0:0.003 | 63.45450 | -53.77500 | 0.0 | 0.0 | 0.0 |
4 | 1713372778459 | 0:0:1.547 | 0:0:1.547 | 0:0:0.016 | 0:0:0.016 | 63.63375 | -53.77500 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2333 | 1713372793954 | 0:0:17.042 | 0:0:17.042 | 0:0:0.003 | 0:0:0.003 | 43.02000 | -21.15150 | 0.0 | 0.0 | 0.0 |
2334 | 1713372793954 | 0:0:17.042 | 0:0:17.044 | 0:0:0.000 | 0:0:0.002 | 42.84075 | -21.15150 | 0.0 | 0.0 | 0.0 |
2335 | 1713372793958 | 0:0:17.046 | 0:0:17.047 | 0:0:0.004 | 0:0:0.003 | 42.66150 | -21.33075 | 0.0 | 0.0 | 0.0 |
2336 | 1713372793960 | 0:0:17.048 | 0:0:17.048 | 0:0:0.002 | 0:0:0.001 | 42.30300 | -21.33075 | 0.0 | 0.0 | 0.0 |
2337 | 1713372793963 | 0:0:17.051 | 0:0:17.051 | 0:0:0.003 | 0:0:0.003 | 42.12375 | -21.51000 | 0.0 | 0.0 | 0.0 |
2338 rows × 10 columns
Converting the time columns to the Pandas time¶
In [1406]:
time_from_start_columns = ['registeredTime', 'scheduledTime']
delta_columns = ['registeredDelta', 'scheduledDelta']
for column in time_from_start_columns:
try:
df[column] = pd.to_timedelta(df[column])
# To get an absolute time
# df[column] = pd.to_timedelta(df[column]) + start_time
except ValueError: # i.e. negative values
df[column] = df[column]
for column in delta_columns:
df[column] = pd.to_timedelta(df[column]).dt.total_seconds() * 1000 # ms
Selecting preferred time column¶
Registered time is preferred by default. But, in case of negative values scheduled time can be selected
In [1407]:
preferred_time_column = 'registeredTime'
preferred_delta_column = 'registeredDelta'
try:
pd.to_timedelta(df[preferred_time_column])
except ValueError:
preferred_time_column = 'scheduledTime'
preferred_delta_column = 'scheduledDelta'
3D visualization¶
In [1408]:
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(111, projection="3d")
time = df[preferred_time_column].values.astype('float64')
x_column = df['x'].to_numpy()
y_column = df['y'].to_numpy()
ax.scatter(x_column, y_column, time)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel(preferred_time_column)
plt.title('Motion data visualization')
ax.view_init(elev=15, azim=-60)
fig.tight_layout()
plt.show()
In [1409]:
import numpy as np
def to_polar(df_coordinates):
df.insert(5, 'theta', 0)
df.insert(6, 'rho', 0)
x0, y0 = df_coordinates['x'].iloc[0], df_coordinates['y'].iloc[0]
df_coordinates['theta'] = np.arctan2(df_coordinates['y'] - y0, df_coordinates['x'] - x0)
df_coordinates['rho'] = np.sqrt((df_coordinates['x'] - x0) ** 2 + (df_coordinates['y'] - y0) ** 2)
return df_coordinates
df = to_polar(df)
df = df.reset_index(drop=True)
df['theta'] = np.unwrap(df['theta'])
The trajectory in polar coordinates¶
In [1410]:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(subplot_kw={'projection': 'polar'})
ax.plot(df['theta'], df['rho'], marker='o')
plt.show()
Fitting¶
In [1411]:
from scipy.optimize import curve_fit
def fitting_function(x, a, b, c, d):
return a + b * x + c * np.cos(x) + d * np.sin(x)
df.insert(7, 'fitted_rho', 0)
theta = df['theta']
rho = df['rho']
fitting_popt, _ = curve_fit(fitting_function, theta, rho)
df['fitted_rho'] = fitting_function(theta, *fitting_popt)
fitted_rho = df['fitted_rho']
display(df)
absoluteTime | registeredTime | scheduledTime | registeredDelta | scheduledDelta | theta | rho | fitted_rho | x | y | tiltX | tiltY | pressure | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1713372778423 | 0 days 00:00:01.511000 | 0 days 00:00:01.511000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.225253 | 63.09600 | -53.95425 | 0.0 | 0.0 | 0.0 |
1 | 1713372778431 | 0 days 00:00:01.519000 | 0 days 00:00:01.520000 | 8.0 | 9.0 | 0.000000 | 0.179250 | 0.225253 | 63.27525 | -53.95425 | 0.0 | 0.0 | 0.0 |
2 | 1713372778439 | 0 days 00:00:01.527000 | 0 days 00:00:01.528000 | 8.0 | 8.0 | 0.785398 | 0.253498 | 2.014936 | 63.27525 | -53.77500 | 0.0 | 0.0 | 0.0 |
3 | 1713372778443 | 0 days 00:00:01.531000 | 0 days 00:00:01.531000 | 4.0 | 3.0 | 0.463648 | 0.400815 | 1.293664 | 63.45450 | -53.77500 | 0.0 | 0.0 | 0.0 |
4 | 1713372778459 | 0 days 00:00:01.547000 | 0 days 00:00:01.547000 | 16.0 | 16.0 | 0.321751 | 0.566838 | 0.968509 | 63.63375 | -53.77500 | 0.0 | 0.0 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2333 | 1713372793954 | 0 days 00:00:17.042000 | 0 days 00:00:17.042000 | 3.0 | 3.0 | 20.969564 | 38.458630 | 37.964670 | 43.02000 | -21.15150 | 0.0 | 0.0 | 0.0 |
2334 | 1713372793954 | 0 days 00:00:17.042000 | 0 days 00:00:17.044000 | 0.0 | 2.0 | 20.973530 | 38.552504 | 37.970794 | 42.84075 | -21.15150 | 0.0 | 0.0 | 0.0 |
2335 | 1713372793958 | 0 days 00:00:17.046000 | 0 days 00:00:17.047000 | 4.0 | 3.0 | 20.979939 | 38.494955 | 37.980674 | 42.66150 | -21.33075 | 0.0 | 0.0 | 0.0 |
2336 | 1713372793960 | 0 days 00:00:17.048000 | 0 days 00:00:17.048000 | 2.0 | 1.0 | 20.987792 | 38.686452 | 37.992754 | 42.30300 | -21.33075 | 0.0 | 0.0 | 0.0 |
2337 | 1713372793963 | 0 days 00:00:17.051000 | 0 days 00:00:17.051000 | 3.0 | 3.0 | 20.994199 | 38.632430 | 38.002587 | 42.12375 | -21.51000 | 0.0 | 0.0 | 0.0 |
2338 rows × 13 columns
Visualization of the unraveled spiral¶
In [1412]:
plt.figure(figsize=(10, 6))
plt.plot(theta, rho, label='Original')
plt.plot(theta, fitted_rho, label='Fitted', linewidth=2)
plt.xlabel('theta (radian)')
plt.ylabel('rho (mm)')
plt.title('Unraveled spiral')
plt.legend()
plt.grid(True)
plt.show()
In [1413]:
first_time = df[preferred_time_column].iloc[0]
last_time = df[preferred_time_column].iloc[-1]
duration = (last_time - first_time).total_seconds()
frequency_s = duration / (len(df) - 1)
frequency_hz = 1 / frequency_s
print(f"Frequency: {frequency_s} s or {frequency_hz} Hz")
max_period = df.loc[1:, 'registeredDelta'].max()
print(f"Max period: {max_period / 1000} s")
Frequency: 0.006649550706033376 s or 150.3861003861004 Hz Max period: 0.08 s
In [1414]:
from scipy.fftpack import fft
def gaussian_function(x, a, w, xc):
return a / (w * np.sqrt(np.pi / 2)) * np.exp(-2 * ((x - xc) / w) ** 2)
fluctuations = rho - fitting_function(theta, *fitting_popt)
URS = np.std(fluctuations)
NFFT = 2 ** (int(np.log2(len(fluctuations))) + 1)
Y = fft(fluctuations.values, NFFT) / len(fluctuations)
f = frequency_hz / 2 * np.linspace(0, 1, NFFT // 2 + 1)
frequencies = 2 * np.abs(Y[:NFFT // 2 + 1])
gaussian_popt, _ = curve_fit(gaussian_function, f, frequencies)
URD = gaussian_popt[0]
The fluctuation chart¶
In [1415]:
plt.figure(figsize=(10, 6))
time_s = df[preferred_time_column].dt.total_seconds()
plt.plot(time_s, fluctuations)
plt.title('Residual fluctuation')
plt.xlabel('time (s)')
plt.ylabel('rho (mm)')
plt.grid(True)
plt.show()
The frequency chart¶
In [1416]:
plt.figure(figsize=(10, 6))
plt.plot(f, frequencies)
plt.title('FFT')
plt.xlabel('frequency (hz)')
plt.ylabel('amplitude (mm/hz)')
plt.grid(True)
plt.show()
Evaluation scores¶
In [1417]:
import math
URS_score = 4.52694 + 10.2136 * math.log10(URS)
URD_Score = 9.20773 + 3.7515 * math.log10(URD)
print(f'URS score: {URS_score}\nURD score: {URD_Score}')
URS score: 3.082894722986693 URD score: 7.34284758728647