x <- array(1:20, dim=c(4,5)) ; x   # Generate a 4 by 5 array.

d <- outer(0:9, 0:9); d

fr <- table(outer(d, d, "-")); fr
length(fr)
dim(fr)

-81 -80 -79 -78 -77 -76 -75 -74 -73 -72 -71 -70 -69 -68 -67 -66 -65 -64 -63 -62 
 19   1   2   2   3   2   4   2   4  41   4   4   8   6   6  10   7  27  49   8 
-61 -60 -59 -58 -57 -56 -55 -54 -53 -52 -51 -50 -49 -48 -47 -46 -45 -44 -43 -42 
  8  17   8  12  18  53  13  60  12  18  22  16  35  70  22  24  66  28  18  72 
-41 -40 -39 -38 -37 -36 -35 -34 -33 -32 -31 -30 -29 -28 -27 -26 -25 -24 -23 -22 
 22  75  37  34  26 111  63  36  45  84  34  94  36  93  97  50  53 156  42  60 
-21 -20 -19 -18 -17 -16 -15 -14 -13 -12 -11 -10  -9  -8  -7  -6  -5  -4  -3  -2 
103 107  50 168  51 140 112 116  59 191  65 126 156 185 115 206 117 179 153 156 
 -1   0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18 
111 570 111 156 153 179 117 206 115 185 156 126  65 191  59 116 112 140  51 168 
 19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38 
 50 107 103  60  42 156  53  50  97  93  36  94  34  84  45  36  63 111  26  34 
 39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58 
 37  75  22  72  18  28  66  24  22  70  35  16  22  18  12  60  13  53  18  12 
 59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78 
  8  17   8   8  49  27   7  10   6   6   8   4   4  41   4   2   4   2   3   2 
 79  80  81 
  2   1  19

plot(fr, xlab="Determinant", ylab="Frequency")

stopifnot(t(xt[,,2]) == x[,,2],
          t(xt[,,3]) == x[,,3],
          t(xt[,,4]) == x[,,4])

UCB <- aperm(UCBAdmissions, c(2,1,3))
UCB[1,,]
summary(UCB) # UCB is still a continency table

          Dept
Admit        A   B   C   D   E   F
  Admitted 512 353 120 138  53  22
  Rejected 313 207 205 279 138 351

Number of cases in table: 4526 
Number of factors: 3 
Test for independence of all factors:
	Chisq = 2000.3, df = 16, p-value = 0

x <- array(1:4, dim=c(2,2)) ; x
(j <- array(c(1:2,2:1), dim=c(2,2)))
as.vector(x)[as.vector(j)]
x[j]

# It’s also possible to have a column of a data frame that’s a matrix or array, as long as the number of rows matches the data frame. (This requires a slight extension to our definition of a data frame: it’s not the length() of each column that must be equal; but the NROW().) Like with list-columns, you must either add after creation, or wrap in I().

dfm <- data.frame(
  x = 1:3 * 10
)
dfm$y <- matrix(1:9, nrow = 3)
dfm$z <- data.frame(a = 3:1, b = letters[1:3], stringsAsFactors = FALSE)

str(dfm)

'data.frame':	3 obs. of  3 variables:
 $ x: num  10 20 30
 $ y: int [1:3, 1:3] 1 2 3 4 5 6 7 8 9
 $ z:'data.frame':	3 obs. of  2 variables:
  ..$ a: int  3 2 1
  ..$ b: chr  "a" "b" "c"

# Data frames allow you to label each row with a “name”, a character vector containing only unique values: 

df3 <- data.frame(
  age = c(35, 27, 18),
  hair = c("blond", "brown", "black"),
  row.names = c("Bob", "Susan", "Sam")
)
df3

v <- data.frame("SN" = 1:2, "Age" = c(21,15), "Name" = c("John", "Dora")); v
class(v)

# In this example, x can be considered as a list of 3 components with each component having a two element vector.

str(v)      # structure of v

'data.frame':	2 obs. of  3 variables:
 $ SN  : int  1 2
 $ Age : num  21 15
 $ Name: Factor w/ 2 levels "Dora","John": 2 1

trees

# A data frame can be examined using functions like str() and head().

str(trees)
head(trees,n=10)

'data.frame':	31 obs. of  3 variables:
 $ Girth : num  8.3 8.6 8.8 10.5 10.7 10.8 11 11 11.1 11.2 ...
 $ Height: num  70 65 63 72 81 83 66 75 80 75 ...
 $ Volume: num  10.3 10.3 10.2 16.4 18.8 19.7 15.6 18.2 22.6 19.9 ...

trees[trees$Height > 82,]    # selects rows with Height greater than 82

v <- rbind(v,list(1,16,"Paul")); v

Warning message in `[<-.factor`(`*tmp*`, ri, value = "Paul"):
“invalid factor level, NA generated”

v <- cbind(v,Age=c("21","15", "16")); v

x <- data.frame(foo = 1:4, bar = c(T, T, F, F)) 
x
nrow(x)
ncol(x)

# Exam data

exam <- data.frame(
  id = 1:5,
  q1 = c(1, 5, 2, 3, 2),
  q2 = c(8, 10, 9, 8, 7),
  q3 = c(3, 7, 4, 6, 4))
exam

# Demographic data

demographics <- data.frame(
  id = 1:5,
  sex = c("f", "m", "f", "f", "m"),
  age = c(25, 22, 24, 19, 23))
demographics

# Combine exam and demographics

combined <- merge(x = exam, 
              y = demographics, 
              by = "id")
combined

# Mean q1 score for each sex

aggregate(formula = q1 ~ sex, 
          data = combined, 
          FUN = mean)
aggregate

function (x, ...) 
UseMethod("aggregate")

# Many summary statistics by sex using dplyr!

library(dplyr)
combined %>% group_by(sex) %>%
  summarise(
    q1.mean = mean(q1),
    q2.mean = mean(q2),
    q3.mean = mean(q3),
    age.mean = mean(age),
    N = n())

# Many summary statistics by sex using dplyr!

library(dplyr)
combined %>% group_by(sex) %>%
  summarise(
    q1.mean = mean(q1),
    q2.mean = mean(q2),
    q3.mean = mean(q3),
    age.mean = mean(age),
    N = n())

x <- factor(c("ab", "cd", "ab"), levels = c("ab", "cd", "ef")); x
typeof(x)
attributes(x)

z <- 0:9; z
digits <- as.character(z); digits
d <- as.integer(digits); d

# # Now d and z are the same.12 There is a large collection of functions of the form as.something() for either coercion from one mode to another, or for investing an object with some other attribute it may not already possess. The reader should consult the different help files to become familiar with them.

r <- factor(c("a","b")); r
levels(r) <- list(C = "C", A = "a", B = "b"); r

factor(c(1, 2, 3), labels=c('a', 'b', 'c'))
factor(c(3.2, 10, 500000), labels=c('a', 'b', 'c'))
factor(c(.49, 1, 5), labels=c('a', 'b', 'c'))

x = sample(state.name, 10000, replace=T)
format(object.size(x), units='Kb')

# Because of the integer+metadata representation, factors are actually smaller than character strings, often notably so.

# While atomic vectors are most commonly turned into matrices, the dimension attribute can also be set on lists to
# make list-matrices or list-arrays:

l <- list(1:3, "a", TRUE, 1.0)
dim(l) <- c(2, 2)
l
l[[1, 1]]

# These are relatively esoteric data structures, but can be useful if you want to arrange objects into a grid-like
# structure. 
# For example, if you’re running models on a spatio-temporal grid, it might be natural to preserve the grid 
# structure by storing the models in a 3d array.

# Unlike atomic vectors, list() can contain a mix of objects. 
# Following is an example of a list having three components each of different data type. In this example, a, b and
# c are called tags which makes it easier to reference the components of the list.

x <- list("a" = 2.5, "b" = TRUE, "c" = 1:3); x
typeof(x)
length(x)

v <- list("John", 19, c("English", "French")); names(v) <- c("name", "age", "speaks"); names(v[[3]]) <- c("english", "french"); v

# Lists can be accessed in similar fashion to vectors. Integer, logical or character vectors can be used for 
# indexing. 

v[c(1:2)]    # index using integer vector
v[-2]        # using negative integer to exclude second component
v[c(T,F,F)]  # index using logical vector
v[c("age","speaks")]    # index using character vector

# Indexing with [ as shown above will give us sublist not the content inside the component. 
# To retrieve the content, we need to use [[. However, this approach will allow us to access only a single component at a time.

v["age"]
typeof(v["age"]) 
v[["age"]] 
typeof(v[["age"]])
v[[2]]

# An alternative to [[, which is used often while accessing content of a list is the $ operator. They are both the same except that $ can do partial matching on tags.

v$name    # same as v[["name"]]
v$a       # partial matching, same as v$ag or v$age
v[["a"]]  # cannot do partial match with [[

NULL

# We can delete a component by assigning NULL to it.

v[["age"]] <- NULL; str(v)

List of 3
 $ name   : chr "Clair"
 $ speaks : chr [1:2] "English" "French"
 $ married: logi FALSE

# Matrices are vectors with a dimension attribute.

m <- matrix(nrow = 2, ncol = 3); m
dim(m)
attributes(m)

# It is also possible to change names

colnames(x) <- c("C1","C2","C3")
rownames(x) <- c("R1","R2","R3")
x

j <- matrix(1:9, nrow = 3, dimnames = list(c("X","Y","Z"), c("A","B","C"))); j

j[,"A"]
j[TRUE,c("A","C")]
j[2:3,c("A","C")]

j[j<5] <- 0; j    # modify elements less than 5

(m1 <- matrix(1:20, 4, 5))
upper.tri(m1)

sex	q1.mean	q2.mean	q3.mean	age.mean	N
f	2.0	8.333333	4.333333	22.66667	3
m	3.5	8.500000	5.500000	22.50000	2

sex	q1.mean	q2.mean	q3.mean	age.mean	N
f	2.0	8.333333	4.333333	22.66667	3
m	3.5	8.500000	5.500000	22.50000	2

R Programming & Statistics Notes

Chapter 2.1.5 Exercises

0	0	0	0	0	0	0	0	0
1	2	3	4	5	6	7	8	9
2	4	6	8	10	12	14	16	18
3	6	9	12	15	18	21	24	27
4	8	12	16	20	24	28	32	36
5	10	15	20	25	30	35	40	45
6	12	18	24	30	36	42	48	54
7	14	21	28	35	42	49	56	63
8	16	24	32	40	48	56	64	72
9	18	27	36	45	54	63	72	81

Girth	Height	Volume
8.3	70	10.3
8.6	65	10.3
8.8	63	10.2
10.5	72	16.4
10.7	81	18.8
10.8	83	19.7
11.0	66	15.6
11.0	75	18.2
11.1	80	22.6
11.2	75	19.9
11.3	79	24.2
11.4	76	21.0
11.4	76	21.4
11.7	69	21.3
12.0	75	19.1
12.9	74	22.2
12.9	85	33.8
13.3	86	27.4
13.7	71	25.7
13.8	64	24.9
14.0	78	34.5
14.2	80	31.7
14.5	74	36.3
16.0	72	38.3
16.3	77	42.6
17.3	81	55.4
17.5	82	55.7
17.9	80	58.3
18.0	80	51.5
18.0	80	51.0
20.6	87	77.0

FALSE	TRUE	TRUE	TRUE	TRUE
FALSE	FALSE	TRUE	TRUE	TRUE
FALSE	FALSE	FALSE	TRUE	TRUE
FALSE	FALSE	FALSE	FALSE	TRUE

	age	hair
Bob	35	blond
Susan	27	brown
Sam	18	black

SN	Age	Name
1	21	John
2	15	Dora

SN	Name	Age
1	John	21
2	Dora	15
1	Paul	16
1	16	NA

SN	Name	Age
1	John	21
2	Dora	15
1	Paul	16

0	0	0	0	0	0	0	0	0
1	2	3	4	5	6	7	8	9
2	4	6	8	10	12	14	16	18
3	6	9	12	15	18	21	24	27
4	8	12	16	20	24	28	32	36
5	10	15	20	25	30	35	40	45
6	12	18	24	30	36	42	48	54
7	14	21	28	35	42	49	56	63
8	16	24	32	40	48	56	64	72
9	18	27	36	45	54	63	72	81

0	0	0	0	0	0	0	0	0
1	2	3	4	5	6	7	8	9
2	4	6	8	10	12	14	16	18
3	6	9	12	15	18	21	24	27
4	8	12	16	20	24	28	32	36
5	10	15	20	25	30	35	40	45
6	12	18	24	30	36	42	48	54
7	14	21	28	35	42	49	56	63
8	16	24	32	40	48	56	64	72
9	18	27	36	45	54	63	72	81