PS5 Through R

Problem 1

Part b)

xx = seq(0, 1, length.out = 1000L)
pi = function(x) .1 + 2.7*x - 7.47*x^2 + 5.57*x^3
plot(xx, pi(xx), type = "l", lwd = 3L, col = "red",
     xlab = "x", ylab = "E[Y | X = x]", ylim = c(0, 1),
     main = "Conditional Mean of Y over Domain of X")

Part d), e), g)

Refer back to the PS4 supplement for more details of this approach to simulation.

library(data.table)
pdf = CJ(x = xx, y = c(TRUE, FALSE))
pdf[(y), p := pi(x)]

##                 x     y         p
##    1: 0.000000000 FALSE        NA
##    2: 0.000000000  TRUE 0.1000000
##    3: 0.001001001 FALSE        NA
##    4: 0.001001001  TRUE 0.1026952
##    5: 0.002002002 FALSE        NA
##   ---                            
## 1996: 0.997997998  TRUE 0.8910880
## 1997: 0.998998999 FALSE        NA
## 1998: 0.998998999  TRUE 0.8955348
## 1999: 1.000000000 FALSE        NA
## 2000: 1.000000000  TRUE 0.9000000

pdf[(!y), p := 1 - pi(x)]

##                 x     y         p
##    1: 0.000000000 FALSE 0.9000000
##    2: 0.000000000  TRUE 0.1000000
##    3: 0.001001001 FALSE 0.8973048
##    4: 0.001001001  TRUE 0.1026952
##    5: 0.002002002 FALSE 0.8946245
##   ---                            
## 1996: 0.997997998  TRUE 0.8910880
## 1997: 0.998998999 FALSE 0.1044652
## 1998: 0.998998999  TRUE 0.8955348
## 1999: 1.000000000 FALSE 0.1000000
## 2000: 1.000000000  TRUE 0.9000000

pdf[ , p := p/sum(p)]

##                 x     y            p
##    1: 0.000000000 FALSE 0.0009000000
##    2: 0.000000000  TRUE 0.0001000000
##    3: 0.001001001 FALSE 0.0008973048
##    4: 0.001001001  TRUE 0.0001026952
##    5: 0.002002002 FALSE 0.0008946245
##   ---                               
## 1996: 0.997997998  TRUE 0.0008910880
## 1997: 0.998998999 FALSE 0.0001044652
## 1998: 0.998998999  TRUE 0.0008955348
## 1999: 1.000000000 FALSE 0.0001000000
## 2000: 1.000000000  TRUE 0.0009000000

pdf[ , .SD[sample(.I, 1e6, replace = TRUE, prob = p), 
           .(`(d)` = mean(y), `(e)` = mean(x*y),
             #X is continuous -- in simulation, since
             #  the probability X = .27 is ZERO, we
             #  have to condition on X being _close to_ .27;
             #  we want to pick an interval small enough
             #  not to give a biased answer, but large enough
             #  to be sure we have a decent sample size in that
             #  small interval -- a classic bias-variance tradeoff!
             `(g)` = mean(y[abs(x - .27) < .005]) - 
               mean(y[abs(x - .63) < .005]))]]

##         (d)       (e)       (g)
## 1: 0.352555 0.1964942 0.1634838

Problem 2

There are two main approaches to declaring a matrix in R. The first is to “glue” the matrix together from column or row vectors – using cbind and rbind, respectively. The second is to build the matrix from scratch using matrix, which accepts ncol and nrow arguments to tell R what the dimension of the output is supposed to be. By default, matrix fills the output column-wise, meaning matrix(1:4, nrow = 2, ncol = 2) will fill 1 in the 1,1 cell, 2 in the 2,1 cell, then move to the second column and put 3 in the 1,2 cell and finally 4 in the 2,2 cell. We could fill row-wise by setting the argument byrow to be TRUE.

A = cbind(c(3, -2, 9))
B = cbind(c(8, 0, -1))
C = matrix(c(7, 0, -1, 2, 5, -4), nrow = 2L)
D = matrix(c(3, 3, 3, 1, 4, -7), nrow = 3L)
E = cbind(c(5, 2, 3), c(1, 0, -4), c(-2, 1, -6))
F = matrix(c(4, 0, 2, 1, 7, -3, -5, 7, 0), nrow = 3L)
G = cbind(c(2, -3, 1, 1), c(-8, 7, 0, 2), c(-5, -4, 3, 6))
K = cbind(c(-9, -2, -1, 0))
L = rbind(c(5, 0, 3, 1))

## a)
A + B

##      [,1]
## [1,]   11
## [2,]   -2
## [3,]    8

## b)
-G

##      [,1] [,2] [,3]
## [1,]   -2    8    5
## [2,]    3   -7    4
## [3,]   -1    0   -3
## [4,]   -1   -2   -6

## c)
t(D)

##      [,1] [,2] [,3]
## [1,]    3    3    3
## [2,]    1    4   -7

## d)
C + D

## Error in C + D: non-conformable arrays

## e)
3*C - 2*t(D)

##      [,1] [,2] [,3]
## [1,]   15   -9    9
## [2,]   -2   -2    2

## f) dot product is also known as the inner product
t(A) %*% B

##      [,1]
## [1,]   15

## g)
C %*% B

##      [,1]
## [1,]   51
## [2,]    4

## h)
B %*% C

## Error in B %*% C: non-conformable arguments

## i)
F %*% B

##      [,1]
## [1,]   37
## [2,]   -7
## [3,]   16

## j)
E %*% F

##      [,1] [,2] [,3]
## [1,]   16   18  -18
## [2,]   10   -1  -10
## [3,]    0   -7  -43

## k) #re-arranging intelligently
L %*% K

##      [,1]
## [1,]  -48

## l) see ?norm - there are many different types of matrix norm
norm(K, type = "F")

## [1] 9.273618

## m)
t(G)

##      [,1] [,2] [,3] [,4]
## [1,]    2   -3    1    1
## [2,]   -8    7    0    2
## [3,]   -5   -4    3    6

## n)
E - 5 * diag(3L)

##      [,1] [,2] [,3]
## [1,]    0    1   -2
## [2,]    2   -5    1
## [3,]    3   -4  -11

Problem 3

Q = matrix(c(1, 2, 3, 7), nrow = 2L)
solve(Q)

##      [,1] [,2]
## [1,]    7   -3
## [2,]   -2    1

Q %*% solve(Q)

##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1