UTORG Intermediate MATLAB Tutorial

Justin J. Boutilier

February 18, 2016

Contents

Numeric data

% Define Num to be 21
Num = 21;
% Check if Num is a real number
isreal(Num)
ans =

     1

% Check if Num is finite number
isfinite(Num)
ans =

     1

Character arrays (i.e., strings)

% Create a string
Str = 'MATLAB rocks!';
% Check if Str is a character array
ischar(Str)
ans =

     1

% Display the first character
Str(1)
ans =

M

% Display the first 6 characters
Str(1:6)
ans =

MATLAB

% Check which entries are spaces
isspace(Str)
ans =

     0     0     0     0     0     0     1     0     0     0     0     0     0

% Check which entries are letters
isletter(Str)
ans =

     1     1     1     1     1     1     0     1     1     1     1     1     0

Checking variable type

% Use the class function
class(Str)
class(Num)
ans =

char


ans =

double

Converting between strings and numbers

% Convert a number to a string
NumString = num2str(Num);
class(NumString)
ans =

char

% Convert a string to a number
Num = str2num(NumString);
class(Num)
ans =

double

Eval function

% Example: Addition
eval('1 + 2')
ans =

     3

% More powerful usage: Assigning variables
str = 'MyVar = 5';
eval(str)
MyVar =

     5

Numeric arrays

Numberic arrays are used to store numbers and they can be defined for (almost) any number of dimensions.

% Create a custom array
Num_Array = [1 1 1; 2 2 2]
Num_Array =

     1     1     1
     2     2     2

% Create a 5x4 array of ones
Num_Array = ones(5,4)
Num_Array =

     1     1     1     1
     1     1     1     1
     1     1     1     1
     1     1     1     1
     1     1     1     1

% Create a 5x4x2 array of zeros
Num_Array = zeros(5,4,2)
Num_Array(:,:,1) =

     0     0     0     0
     0     0     0     0
     0     0     0     0
     0     0     0     0
     0     0     0     0


Num_Array(:,:,2) =

     0     0     0     0
     0     0     0     0
     0     0     0     0
     0     0     0     0
     0     0     0     0

% Repeat the number 21 for 2 rows and 3 columns
Num_Array = repmat(21,2,3)
Num_Array =

    21    21    21
    21    21    21

% Repeat the column vector [21;21] for 2 rows and 3 columns
Num_Array = repmat([21; 21], 2,3)
Num_Array =

    21    21    21
    21    21    21
    21    21    21
    21    21    21

Tables

Tables are used to store mixed-type data organized into columns.

% Read table from comma-delimited text file
T = readtable('patients.dat');
% Determine the size of the table
size(T)
ans =

   100    10

% Summarize table
summary(T)
Variables:

    LastName: 100x1 cell string

    Gender: 100x1 cell string

    Age: 100x1 double
        Values:

            min       25   
            median    39   
            max       50   

    Location: 100x1 cell string

    Height: 100x1 double
        Values:

            min       60      
            median    67      
            max       72      

    Weight: 100x1 double
        Values:

            min         111   
            median    142.5   
            max         202   

    Smoker: 100x1 double
        Values:

            min       0       
            median    0       
            max       1       

    Systolic: 100x1 double
        Values:

            min       109       
            median    122       
            max       138       

    Diastolic: 100x1 double
        Values:

            min         68       
            median    81.5       
            max         99       

    SelfAssessedHealthStatus: 100x1 cell string

% Display the first 2 rows
T(1:2,:)
ans = 

    LastName     Gender    Age            Location             Height    Weight    Smoker    Systolic    Diastolic    SelfAssessedHealthStatus
    _________    ______    ___    _________________________    ______    ______    ______    ________    _________    ________________________

    'Smith'      'Male'    38     'County General Hospital'    71        176       1         124         93           'Excellent'             
    'Johnson'    'Male'    43     'VA Hospital'                69        163       0         109         77           'Fair'                  

% Display the 3rd and 17th rows
T([3 17],:)
ans = 

     LastName      Gender     Age             Location              Height    Weight    Smoker    Systolic    Diastolic    SelfAssessedHealthStatus
    __________    ________    ___    ___________________________    ______    ______    ______    ________    _________    ________________________

    'Williams'    'Female'    38     'St. Mary's Medical Center'    64        131       0         125         83           'Good'                  
    'Thompson'    'Male'      32     'St. Mary's Medical Center'    69        191       1         124         95           'Excellent'             

% Create a new column (body mass index)
T.BMI = (T.Weight*0.453592)./(T.Height*0.0254).^2;
% Sort rows (by height)
Sorted_T = sortrows(T,'Height','descend');
Sorted_T(1:5,:)
ans = 

    LastName    Gender    Age             Location              Height    Weight    Smoker    Systolic    Diastolic    SelfAssessedHealthStatus     BMI  
    ________    ______    ___    ___________________________    ______    ______    ______    ________    _________    ________________________    ______

    'White'     'Male'    39     'VA Hospital'                  72        202       1         130         95           'Excellent'                 27.396
    'Reed'      'Male'    50     'VA Hospital'                  72        186       1         129         89           'Excellent'                 25.226
    'Brooks'    'Male'    39     'St. Mary's Medical Center'    72        176       0         120         74           'Excellent'                  23.87
    'Price'     'Male'    31     'VA Hospital'                  72        178       1         124         84           'Fair'                      24.141
    'Smith'     'Male'    38     'County General Hospital'      71        176       1         124         93           'Excellent'                 24.547

% Replace the last name Wilson by an empty cell
T.LastName{strcmp(T.LastName, 'Wilson')} = '';
% Remove rows with missing data
Missing_ids = any(ismissing(T),2); % get indices for rows with missing data
Clean_T = T(~Missing_ids,:); % remove rows
size(Clean_T)
ans =

    99    11

Cell arrays

Cell arrays can be used to store data of all types. Individual cells can store numeric arrays, tables, or other cell arrays.

% Create an empty cell array
C = cell(5,4,2)
C(:,:,1) = 

    []    []    []    []
    []    []    []    []
    []    []    []    []
    []    []    []    []
    []    []    []    []


C(:,:,2) = 

    []    []    []    []
    []    []    []    []
    []    []    []    []
    []    []    []    []
    []    []    []    []

% Define a custom cell array
C = {'one', 'two', 'three'; 1, 2, 3}
C = 

    'one'    'two'    'three'
    [  1]    [  2]    [    3]

% Smooth parentheses refer to sets of cells
C(:,1:2)
ans = 

    'one'    'two'
    [  1]    [  2]

% Curly parentheses refer to data within individual cells
C{1,1}
ans =

one

% Store an 5x5 array in cell (2,1)
C{2,1} = ones(5,5)
C = 

    'one'           'two'    'three'
    [5x5 double]    [  2]    [    3]

% Store a cell array in cell (2,2)
C{2,2} = {'another one',1,1}
C = 

    'one'           'two'         'three'
    [5x5 double]    {1x3 cell}    [    3]

% Access data in cell (2,2)
C{2,2}
ans = 

    'another one'    [1]    [1]

Learning assignment I

  1. Load the file 'patients.dat' to a table.
  2. Create a new column named 'BPRatio', defined as the ratio of systolic to diastolic blood pressure
  3. Find the median and mean for BPRatio
T = readtable('patients.dat');
T.BPRatio = T.Systolic./T.Diastolic;
median(T.BPRatio)
mean(T.BPRatio)
ans =

    1.4789


ans =

    1.4868

Logicals

Logicals can be used to quickly filter and access data

% Determine which rows correspond to a smoker
T.Smoker == 1; % Returns a binary array
find(T.Smoker == 1); % Returns row indices
% Create a new table with only data from smokers
Smokers = T(T.Smoker == 1,:);
% Find the median diastolic BP for smokers vs. non smokers
median(T.Diastolic(T.Smoker == 1))
median(T.Diastolic(T.Smoker == 0))
ans =

    91


ans =

    79

% Display data for smokers over 70 inches
T(T.Smoker == 1 & T.Height > 70,:)
ans = 

     LastName     Gender    Age            Location             Height    Weight    Smoker    Systolic    Diastolic    SelfAssessedHealthStatus    BPRatio
    __________    ______    ___    _________________________    ______    ______    ______    ________    _________    ________________________    _______

    'Smith'       'Male'    38     'County General Hospital'    71        176       1         124         93           'Excellent'                 1.3333 
    'White'       'Male'    39     'VA Hospital'                72        202       1         130         95           'Excellent'                 1.3684 
    'Martin'      'Male'    48     'VA Hospital'                71        181       1         130         92           'Good'                       1.413 
    'Baker'       'Male'    44     'VA Hospital'                71        192       1         136         90           'Good'                      1.5111 
    'Mitchell'    'Male'    39     'County General Hospital'    71        164       1         128         92           'Fair'                      1.3913 
    'Reed'        'Male'    50     'VA Hospital'                72        186       1         129         89           'Excellent'                 1.4494 
    'Price'       'Male'    31     'VA Hospital'                72        178       1         124         84           'Fair'                      1.4762 

% Display patients with the last names Carter or Sanchez
T(strcmp(T.LastName,'Carter') | strcmp(T.LastName,'Sanchez'),:)
ans = 

    LastName      Gender     Age             Location              Height    Weight    Smoker    Systolic    Diastolic    SelfAssessedHealthStatus    BPRatio
    _________    ________    ___    ___________________________    ______    ______    ______    ________    _________    ________________________    _______

    'Carter'     'Female'    38     'St. Mary's Medical Center'    63        128       0         120         74           'Good'                      1.6216 
    'Sanchez'    'Female'    44     'St. Mary's Medical Center'    62        136       1         130         91           'Good'                      1.4286 

Learning assignment II

  1. Find the number of smokers over 45 and their median BMI
  2. Find the number of non smokers under 30 and their median BMI
  3. Find the average height and weight for females vs. males
% Number 1
T.BMI = (T.Weight*0.453592)./(T.Height*0.0254).^2;
sum(T.Smoker == 1 & T.Age > 45)
median(T.BMI(T.Smoker == 1 & T.Age > 45))
ans =

     5


ans =

   25.2259

% Number 2
T.BMI = (T.Weight*0.453592)./(T.Height*0.0254).^2;
sum(T.Smoker == 0 & T.Age < 45)
median(T.BMI(T.Smoker == 0 & T.Age < 45))
ans =

    50


ans =

   22.6722

% Number 3
[mean(T.Height(strcmp(T.Gender,'Male')))
mean(T.Height(strcmp(T.Gender,'Female')))]

[mean(T.Weight(strcmp(T.Gender,'Male')))
mean(T.Weight(strcmp(T.Gender,'Female')))]
ans =

   69.2340
   65.1509


ans =

  180.5319
  130.4717

Reading and writing data tables

% readtable
Wine = readtable('wine.csv', 'Delimiter', ','); % CSV file
Wine = readtable('wine.xlsx'); % Excel file
Wine = readtable('wine.txt','Delimiter','\t'); % Text file
Warning: Variable names were
modified to make them valid MATLAB
identifiers. 
% writetable
writetable(Wine, 'wine.csv', 'Delimiter', ','); % CSV file
%writetable(Wine, 'wine.xlsx'); % Excel file
writetable(Wine, 'wine.txt','Delimiter','\t'); % Text file

Reading and writing numeric data

% dlmread
Wine = dlmread('wine.csv',',',1,0); % CSV file
Wine = dlmread('wine.txt','\t',1,0); % Text file
% dlmwrite
dlmwrite('wine_nohead.txt',Wine,'delimiter','\t')
% csvread
Wine = csvread('wine.csv',1);
% csvwrite
csvwrite('wine_nohead.csv',Wine);

Reading and writing MAT files

% Save
save Wine.mat Wine
% Load
load Wine.mat
% The native file type can be used to save MATLAB object(s)
[m,n] = size(Wine);
save Wine_wSize.mat Wine m n

MATLAB import tool

Creating figures

% Scatter plot
T = readtable('patients.dat');
scatter(T.Height,T.Weight)
% Determining the trendline
Myfit = polyfit(T.Height,T.Weight,1); % Fit a line to our data
X = min(T.Height):0.1:max(T.Height); % Height data range
Y = polyval(Myfit,X);
% Adding trendline to plot
scatter(T.Height,T.Weight)
hold on
plot(X,Y) % plot trendline
hold off
% Axis labels
scatter(T.Height,T.Weight)
xlabel('Height (inches)')
ylabel('Weight (pounds)')
title('Comparison of height vs. weight')
% Adding color indicators and a legend
IsFem = strcmp(T.Gender,'Female'); % rows corresponding to females

% Scatter plot with filled circles (size=36)
scatter(T.Height(IsFem),T.Weight(IsFem),36,'r','filled')  % red
hold on
scatter(T.Height(~IsFem),T.Weight(~IsFem),36,'b','filled') % blue
hold off

% Axes labels
xlabel('Height (inches)')
ylabel('Weight (pounds)')

% Legend
[h,~] = legend({'Female','Male'},'Location','northwest');

Learning assignment III

  1. Load the 'patients.txt' file as a table (note the file is tab delimited)
  2. Create a scatter plot of systolic vs. diastolic blood pressure
  3. Add a linear trendline
  4. Label smokers as red and non-smokers as green
  5. Add a legend and axis labels
% Read data
T = readtable('patients.txt','Delimiter','\t');

% Color-coded scatter plots
scatter(T.Systolic(T.Smoker == 1),T.Diastolic(T.Smoker == 1),36,'r','filled')
hold on
scatter(T.Systolic(T.Smoker == 0),T.Diastolic(T.Smoker == 0),36,'g','filled')

% Add trendline
Myfit = polyfit(T.Systolic,T.Diastolic,1);
X = min(T.Systolic):0.1:max(T.Systolic);
Y = polyval(Myfit,X);
plot(X,Y)
hold off

% Labels and legend
xlabel('Systolic BP (mmHg)')
ylabel('Diastolic BP (mmHg)')
[h,~] = legend({'Smokers','Non-smokers'},'Location','northwest');

Optimization Toolbox - linprog

The linprog command is used to solve linear optimization problems of the form:

$$
\begin{array}{cl}
 \min & c'x \\
 \mbox{s.t.} & A\;x \leq b, \\
& Aeq\;x = beq, \\
& lb \leq x \leq ub.
\end{array}
$$

The correspinding syntax for linprog is:

[x, opt] = linprog(c,A,b,Aeq,beq,lb,ub);

Simple LP

Consider the following simple linear optimization problem:

$$
\begin{array}{cl}
 \min & x + y \\
 \mbox{s.t.} & x + y \geq 1, \\
& x,y \geq 0.
\end{array}
$$

This can be solved in MATLAB using linprog as follows:

%Define the cost function
c = [1; 1];

%Define the constraint matrix
A = [-1, -1];

%Define the RHS
b = -1;

%Define the lower/upper bounds on the variables
lb = [0; 0];

%Solve using linprog
[x,fval] = linprog(c,A,b,[],[],lb,[])
Optimization terminated.

x =

    0.5000
    0.5000


fval =

    1.0000

Optimization Toolbox - intlinprog

The intlinprog command is used to solve mixed-integer linear optimization problems of the form:

$$
\begin{array}{cl}
 \min & c'x \\
 \mbox{s.t.} & A\;x \leq b, \\
& Aeq\;x = beq, \\
& lb \leq x \leq ub, \\
& x(intcon)\;is\;integer.
\end{array}
$$

The correspinding syntax for linprog is:

[x, opt] = intlinprog(c,intcon,A,b,Aeq,beq,lb,ub);

Simple MILP

Consider the following simple mixed-integer linear optimization problem:

$$
\begin{array}{cl}
 \min & x + y \\
 \mbox{s.t.} & x + y \geq 1, \\
& x,y \in \{0,1\}.
\end{array}
$$

This can be solved in MATLAB using linprog as follows:

%Define the cost function
c = [1; 1];

%Define the constraint matrix
A = [-1, -1];

%Define the RHS
b = -1;

%Define the variables to be binary
intcon = [1 2];

%Solve using linprog
[x,fval] = intlinprog(c,intcon,A,b,[],[],[],[])
LP:                Optimal objective value is 1.000000.                                             


Optimal solution found.

Intlinprog stopped at the root node because the objective value is within a gap
tolerance of the optimal value, options.TolGapAbs = 0 (the default value). The
intcon variables are integer within tolerance, options.TolInteger = 1e-05 (the
default value).


x =

     1
     0


fval =

     1

Learning assingment IV

First, load 'wine.csv'. Next, formulate an optimization problem that seeks to maximize the sum of wine quality with the following constraints:

% Read data
T = readtable('wine.csv');
[m,~] = size(T);
% Formulate MILP with intlinprog
c = -T.quality; % cost vector (negative because it's a max problem)
intcon = 1:m; % index integer variables

A = [ones(1,m); (1/5)*T.residual_sugar'; diag(T.pH)]; % A matrix

b = [5; 2; repmat(3,m,1)]; % b matrix

lb = zeros(m,1); % lb = 0
ub = ones(m,1); % ub = 1

[x,fval] = intlinprog(c,intcon,A,b,[],[],lb,ub); % solve
LP:                Optimal objective value is -36.607143.                                           

Cut Generation:    Applied 2 cover cuts.                                                            
                   Lower bound is -36.000000.                                                       
                   Relative gap is 0.00%.                                                          


Optimal solution found.

Intlinprog stopped at the root node because the objective value is within a gap
tolerance of the optimal value, options.TolGapAbs = 0 (the default value). The
intcon variables are integer within tolerance, options.TolInteger = 1e-05 (the
default value).

% Display optimal cost and chosen wines
-fval
T(x>0,:)
ans =

    36


ans = 

     ID     fixed_acidity    volatile_acidity    citric_acid    residual_sugar    chlorides    free_sulfur_dioxide    total_sulfur_dioxide    density     pH     sulphates    alcohol    quality
    ____    _____________    ________________    ___________    ______________    _________    ___________________    ____________________    _______    ____    _________    _______    _______

     441    12.6              0.31               0.72           2.2               0.072         6                      29                      0.9987    2.88    0.82          9.8       8      
     584      12              0.28               0.49           1.9               0.074        10                      21                      0.9976    2.98    0.66          9.9       7      
     615     9.2             0.755               0.18           2.2               0.148        10                     103                      0.9969    2.87    1.36         10.2       6      
     658      12               0.5               0.59           1.4               0.073        23                      42                       0.998    2.92    0.68         10.5       7      
    1091      10              0.26               0.54           1.9               0.083        42                      74                     0.99451    2.98    0.63         11.8       8      

Optimization Packages - CVX

Consider the simple linear optimization problem from above:

$$
\begin{array}{cl}
 \min & x + y \\
 \mbox{s.t.} & x + y \geq 1, \\
& x,y \geq 0.
\end{array}
$$

This can be solved in CVX as follows:

cvx_solver Gurobi
cvx_begin
    variable x(1) binary % binary variable of length 1
    variable y(1) binary
    minimize(x+y) % objective
    subject to
        x + y >= 1; % constraints
        y>=0;
        x>=0;
cvx_end
 
Calling Gurobi 6.00: 5 variables, 3 equality constraints
------------------------------------------------------------
Gurobi optimizer, licensed to CVX for CVX
Optimize a model with 3 rows, 5 columns and 7 nonzeros
Coefficient statistics:
  Matrix range    [1e+00, 1e+00]
  Objective range [1e+00, 1e+00]
  Bounds range    [1e+00, 1e+00]
  RHS range       [1e+00, 1e+00]
Found heuristic solution: objective 1
Presolve removed 3 rows and 5 columns
Presolve time: 0.00s

Explored 0 nodes (0 simplex iterations) in 0.00 seconds
Thread count was 1 (of 4 available processors)

Optimal solution found (tolerance 1.00e-04)
Best objective 1.000000000000e+00, best bound 1.000000000000e+00, gap 0.0%
------------------------------------------------------------
Status: Solved
Optimal value (cvx_optval): +1
 

Learning assignment IV with CVX

% Formulate MILP in CVX
cvx_solver Gurobi
cvx_begin
    variable x(m) binary % define binary variable of length m
    maximize(T.quality'*x) % max wine quality
    subject to
        sum(x) <= 5; % at most 5 wines
        (1/5)*T.residual_sugar'*x <= 2; % average sugar less than 2
        x.*T.pH <= repmat(3,m,1); % no wine above 3 pH
cvx_end
 
Calling Gurobi 6.00: 3200 variables, 1601 equality constraints
------------------------------------------------------------
Gurobi optimizer, licensed to CVX for CVX
Optimize a model with 1601 rows, 3200 columns and 6398 nonzeros
Coefficient statistics:
  Matrix range    [2e-01, 4e+00]
  Objective range [3e+00, 8e+00]
  Bounds range    [1e+00, 1e+00]
  RHS range       [2e+00, 5e+00]
Found heuristic solution: objective 0
Presolve removed 1599 rows and 3178 columns
Presolve time: 0.00s
Presolved: 2 rows, 22 columns, 44 nonzeros
Variable types: 0 continuous, 22 integer (14 binary)

Root relaxation: objective -3.660714e+01, 3 iterations, 0.00 seconds

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time

     0     0  -36.60714    0    2    0.00000  -36.60714      -     -    0s
H    0     0                     -36.0000000  -36.60714  1.69%     -    0s

Explored 0 nodes (3 simplex iterations) in 0.01 seconds
Thread count was 2 (of 4 available processors)

Optimal solution found (tolerance 1.00e-04)
Best objective -3.600000000000e+01, best bound -3.600000000000e+01, gap 0.0%
------------------------------------------------------------
Status: Solved
Optimal value (cvx_optval): +36
 
% Display chosen wines
T(x>0,:)
ans = 

     ID     fixed_acidity    volatile_acidity    citric_acid    residual_sugar    chlorides    free_sulfur_dioxide    total_sulfur_dioxide    density     pH     sulphates    alcohol    quality
    ____    _____________    ________________    ___________    ______________    _________    ___________________    ____________________    _______    ____    _________    _______    _______

     441    12.6             0.31                0.72           2.2               0.072         6                      29                      0.9987    2.88    0.82          9.8       8      
     584      12             0.28                0.49           1.9               0.074        10                      21                      0.9976    2.98    0.66          9.9       7      
     658      12              0.5                0.59           1.4               0.073        23                      42                       0.998    2.92    0.68         10.5       7      
    1019       8             0.18                0.37           0.9               0.049        36                     109                     0.99007    2.89    0.44         12.7       6      
    1091      10             0.26                0.54           1.9               0.083        42                      74                     0.99451    2.98    0.63         11.8       8