theremin.m


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

clear;

show_graphs = false;

%%%%%%%%%%%%%%%
% "constants" %
%%%%%%%%%%%%%%%

% how many updates per second, determines the length of audio snippets
const_frames_per_second = 100;

const_Fs = 44100;  % sample rate in Hz
const_te = 1/const_frames_per_second; % signal duration in seconds
const_samples_per_frame = ceil(const_Fs * const_te);
const_sample_range = 0:const_samples_per_frame-1;

% the smaller the value, the quicker the fade out
const_fade_speed = 0.975;

% signal "generator" - only gives the input for sin()
%  it does not apply the sin function yet, see below for reasoning
signal_f = @(freq) (freq ./ const_Fs .* 2 .* pi .* const_sample_range);

% dimensions to use for detecting number of hands in frame
zero_hands = size(NaN(0,0));
one_hand   = size(NaN(1,1));
two_hands  = size(NaN(1,2));

deviceWriter = audioDeviceWriter('SampleRate', const_Fs, ...
    'SupportVariableSizeInput', true, 'BufferSize', 3 * const_samples_per_frame);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% init matleap by calling for first frame %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
matleap_frame;

% runtime variables
P = NaN(1000000,3);
count = 1;
done = false;

frequency_pos = 0;
height_pos = 0;

offset = 0;

signal = sin(signal_f(0));

complete_signal = signal;

% time gestures, used for program termination
gesture_count = 0;

% main loop
while gesture_count < const_frames_per_second
    frame = matleap_frame;
    handCount = size(frame.hands);
    
    % slowly decrease volume
    height_pos = height_pos * const_fade_speed;
    
    if isequal(one_hand, handCount)        
        %pos = frame.hands(1).palm.position;
        pos = frame.hands(1).palm.stabilized_position;
        
        frequency_pos = pos(1);
        height_pos = max(150, height_pos);
        
        P(count, 1:3) = pos;
        count = count + 1;
    
        if frame.gesture > 0
            gesture_count = gesture_count + 1;
        else
            gesture_count = 0;
        end
    elseif isequal(two_hands, handCount)
        gesture_count = 0;
        
        %pos = frame.hands(1).palm.position;
        pos = frame.hands(1).palm.stabilized_position;
        
        frequency_pos = pos(1);
        %y_one = pos(2);
        
        P(count, 1:3) = pos;
        count = count + 1;
        
        %pos = frame.hands(2).palm.position;
        pos = frame.hands(2).palm.stabilized_position;

        %x_two = pos(1);
        height_pos = pos(2);
    else
        % no hands, do nothing (or more than 2 (how?! :D))
        gesture_count = 0;
    end
    
    % play current sound
    [signal, offset] = get_theremin_sound_bit(frequency_pos, height_pos, offset, signal_f);
    
    buffer_under_flow = deviceWriter(signal(:)); 
    
    if buffer_under_flow ~= 0
        disp("Buffer ran empty!");
    end
    
    complete_signal = [complete_signal signal];
end

release(deviceWriter)

%theremin_player = audioplayer(complete_signal, const_Fs);
%play(theremin_player);

if show_graphs == true
    % extract values
    x = P(:,1); % links (-) rechts (+) (LED zu uns)
    y = P(:,2); % oben unten
    z = P(:,3); % vorne (+) hinten (-) (LED zu uns)

    % plot
    figure("Position",[0,0, 1200, 2400]);
    t = tiledlayout(4,1);

    nexttile;
    plot(x);
    ylabel('left right');

    nexttile;
    plot(y);
    ylabel('height');

    nexttile;
    plot(z);
    ylabel('depth');

    nexttile;
    plot3(z,x,y);
    xlabel('depth');
    ylabel('left right');
    zlabel('height');

    %{
    % Plot both audio channels
    N = size(complete_signal,2); % Determine total number of samples in audio file
    figure;
    subplot(1,1,1);
    stem(1:N, complete_signal(1,:));
    title('Audio Channel');
    % Plot the spectrum
    df = const_Fs / N;
    w = (-(N/2):(N/2)-1)*df;
    y = fft(complete_signal(1,:), N) / N; % For normalizing, but not needed for our analysis
    y2 = fftshift(y);
    figure;
    plot(w,abs(y2));
    %}
end

disp('If you want to save your audio, run `audiowrite(<.wav filename>, complete_signal, const_Fs)`');

function [sound,offset] = get_theremin_sound_bit(x, y, offset, generator)
    % the values used here are mostly empirical
    volume = y / 1300;
    % How it works:
    %  https://web.physics.ucsb.edu/~lecturedemonstrations/Composer/Pages/60.17.html
    frequency = max(0.003, (x - 50) / 220) * 1000; % have at least 3Hz

    % here we generate the array to put into sin(), but offset it with the
    %  the previous frames offset..
    base = generator(frequency) + offset;
    
    % ..which we take from here - doing so stops us from phase jumping
    offset = base(end);
    
    % at last, apply the volume
    sound = sin(base) .* volume;
end