Finished all chapters and definitions. I need to add subsections and see if there's any theorems or definitions in the appendicies that are worth adding to this as well.
This commit is contained in:
Binary file not shown.
@@ -2,6 +2,8 @@
|
||||
\usepackage{init}
|
||||
\usepackage{import}
|
||||
\usepackage{dsfont}
|
||||
\usepackage{color, colortbl}
|
||||
\definecolor{Gray}{gray}{0.9}
|
||||
|
||||
\newcommand{\cond}[1]{\text{cond}\left(#1\right)}
|
||||
\newcommand{\linear}[1]{\mathcal{L}\left(#1\right)}
|
||||
|
||||
+8
-8
@@ -14,12 +14,12 @@
|
||||
& \C & \text{the field of complex numbers} \\
|
||||
& \C_i & \text{the $i$th Gerschgorin disk} \\
|
||||
& \cond{A} & \text{the condition number of the matrix $A$} \\
|
||||
& C^n(\R) & \text{set of functions $f$ on $\R$ with $f^{(n)}$ continuous} \\
|
||||
& C^\infty & \text{set of functions with derivatives of every order} \\
|
||||
& C(\R) & \text{the vector space of continuous functions on $\R$} \\
|
||||
& C([0,1]) & \text{the vector space of continuous functions on $[0,1]$} \\
|
||||
& C_x & \text{the $T$-cyclic subspaces generated by $x$} \\
|
||||
& D & \text{the derivative operator on $C^\infty$} \\
|
||||
& \mathsf{C}^n(\R) & \text{set of functions $f$ on $\R$ with $f^{(n)}$ continuous} \\
|
||||
& \mathsf{C}^\infty & \text{set of functions with derivatives of every order} \\
|
||||
& \mathsf{C}(\R) & \text{the vector space of continuous functions on $\R$} \\
|
||||
& \mathsf{C}([0,1]) & \text{the vector space of continuous functions on $[0,1]$} \\
|
||||
& \mathsf{C}_x & \text{the $T$-cyclic subspaces generated by $x$} \\
|
||||
& \mathsf{D} & \text{the derivative operator on $C^\infty$} \\
|
||||
& \ldet{A} & \text{the determinant of the matrix $A$} \\
|
||||
& \delta_{ij} & \text{the Kronecker delta} \\
|
||||
& \ldim{V} & \text{the dimension of $V$} \\
|
||||
@@ -33,11 +33,11 @@
|
||||
& F^n & \text{the set of $n$-tuples with entries in a field $\F$} \\
|
||||
& f(T) & \text{the polynomial $f(x)$ evaluated at the operator $T$} \\
|
||||
& \mathcal{F}(S,\F) & \text{the set of functions from $S$ to a field $\F$} \\
|
||||
& H & \text{space of continuous complex functions on $[0, 2\pi]$} \\
|
||||
& \mathsf{H} & \text{space of continuous complex functions on $[0, 2\pi]$} \\
|
||||
& I_n \text{ or } I & \text{the $n \times n$ identity matrix} \\
|
||||
& \Id_V \text{ or } \Id & \text{the identity operator on $V$} \\
|
||||
& K_\lambda & \text{generalized eigenspace of $T$ corresponding to $\lambda$} \\
|
||||
& K_\phi & \{x\ |\ (\phi(T))^p(x) = 0 \text{, for some positive integer $p$}\} \\
|
||||
& K_\phi & \{x : (\phi(T))^p(x) = 0 \text{, for some positive integer $p$}\} \\
|
||||
& L_A & \text{left-multiplication transformation by matrix $A$} \\
|
||||
& \lim_{m \to \infty}A_m & \text{the limit of a sequence of matrices} \\
|
||||
& \linear{V} & \text{the space of linear transformations from $V$ to $V$} \\
|
||||
|
||||
@@ -8,9 +8,9 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space and $\beta = \{v_1, v_2, \dots, v_n\}$ be a subset of $V$. Then $\beta$ is a basis for $V$ if and only if each $v \in V$ can be uniquely expressed as a linear combination of vectors of $\beta$, that is, can be expressed in the form
|
||||
|
||||
|
||||
\[v = a_1v_1 + a_2v_2 + \dots + a_nv_n\]
|
||||
|
||||
|
||||
for unique scalars $a_1, a_2, \dots, a_n$.
|
||||
\end{theorem}
|
||||
|
||||
@@ -38,9 +38,9 @@
|
||||
Let $V$ be a vector space with dimension $n$.
|
||||
\begin{enumerate}
|
||||
\item Any finite generating set for $V$ contains at least $n$ vectors, and a generating set for $V$ that contains exactly $n$ vectors is a basis for $V$.
|
||||
|
||||
|
||||
\item Any linearly independent subset of $V$ that contains exactly $n$ vectors is a basis for $V$.
|
||||
|
||||
|
||||
\item Every linearly independent subset of $V$ can be extended to a basis for $V$.
|
||||
\end{enumerate}
|
||||
\end{corollary}
|
||||
@@ -59,49 +59,49 @@
|
||||
\begin{definition}[\textbf{The Lagrange Interpolation Formula}]
|
||||
\hfill\\
|
||||
Corollary 2 of the replacement theorem can be applied to obtain a useful formula. Let $c_0, c_1, \dots, c_n$ be distinct scalars in an infinite field $\F$. The polynomials $f_0(x), f_1(x), \dots, f_n(x)$ defined by
|
||||
|
||||
|
||||
\[f_i(x) = \frac{(x-c_0)\dots(x-c_{i-1})(x-c_{i+1})\dots(x-c_n)}{(c_i - c_0)\dots(c_i-c_{i-1})(c_i-c_{i+1})\dots(c_i-c_n)} = \prod_{\substack{k=0 \\ k \neq i}}^{n} \frac{x-c_k}{c_i - c_k}\]
|
||||
|
||||
are called the \textbf{Lagrange polynomials} (associated with $c_0, c_1, \dots, c_n$). Note that each $f_i(x)$ is a polynomial of degree $n$ and hence is in $P_n(\F)$. By regarding $f_i(x)$ as a polynomial function $f_i: \F \to \F$, we see that
|
||||
|
||||
|
||||
are called the \textbf{Lagrange polynomials} (associated with $c_0, c_1, \dots, c_n$). Note that each $f_i(x)$ is a polynomial of degree $n$ and hence is in $P_n(\F)$. By regarding $f_i(x)$ as a polynomial function $f_i: \F \to \F$, we see that
|
||||
|
||||
\begin{equation}
|
||||
f_i(c_j) = \begin{cases}
|
||||
0 &\text{if}\ i \neq j,\\
|
||||
1 &\text{if}\ i = j.
|
||||
0 & \text{if}\ i \neq j, \\
|
||||
1 & \text{if}\ i = j.
|
||||
\end{cases}
|
||||
\end{equation}
|
||||
|
||||
This property of Lagrange polynomials can be used to show that $\beta = \{f_0, f_1, \dots, f_n\}$ is a linearly independent subset of $P_n(\F)$. Suppose that
|
||||
|
||||
|
||||
This property of Lagrange polynomials can be used to show that $\beta = \{f_0, f_1, \dots, f_n\}$ is a linearly independent subset of $P_n(\F)$. Suppose that
|
||||
|
||||
\[\sum_{i=0}^{n}a_if_i = 0\ \ \text{for some scalars}\ a_0, a_1, \dots, a_n,\]
|
||||
|
||||
|
||||
where $0$ denotes the zero function. Then
|
||||
|
||||
|
||||
\[\sum_{i=0}^{n}a_if_i(c_j)=0\ \ \text{for}\ j=0, 1, \dots, n.\]
|
||||
|
||||
|
||||
But also
|
||||
|
||||
|
||||
\[\sum_{i=0}^{n}a_if_i(c_j)=a_j\]
|
||||
|
||||
|
||||
by (1.1). Hence $a_j = 0$ for $j = 0, 1, \dots, n$; so $\beta$ is linearly independent. Since the dimension of $P_n(\F)$ is $n + 1$, it follows from Corollary 2 of the replacement theorem that $\beta$ is a basis for $P_n(\F)$.
|
||||
|
||||
|
||||
Because $\beta$ is a basis for $P_n(\F)$, every polynomial function $g$ in $P_n(\F)$ is a linear combination of polynomial functions of $\beta$, say,
|
||||
|
||||
|
||||
\[g = \sum_{i=0}^{n}b_if_i.\]
|
||||
|
||||
It follows that
|
||||
|
||||
|
||||
It follows that
|
||||
|
||||
\[g(c_j)=\sum_{i=0}^{n}b_if_i(c_j)=b_j;\]
|
||||
|
||||
|
||||
so
|
||||
|
||||
|
||||
\[g=\sum_{i=0}^{n}g(c_i)f_i\]
|
||||
|
||||
|
||||
is the unique representation of $g$ as a linear combination of elements of $\beta$. This representation is called the \textbf{Lagrange interpolation formula}. Notice that the preceding argument shows that if $b_0, b_1, \dots, b_n$ are any $n+1$ scalars in $\F$ (not necessarily distinct), then the polynomial function
|
||||
|
||||
|
||||
\[g = \sum_{i=0}^{n}b_if_i\]
|
||||
|
||||
|
||||
is the unique polynomial in $P_n(\F)$ such that $g(c_j) = b_j$. Thus we have found the unique polynomial of degree not exceeding $n$ that has specified values $b_j$ at given points $c_j$ in its domain ($j = 0, 1, \dots, n$).\\
|
||||
|
||||
|
||||
An important consequence of the Lagrange interpolation formula is the following result: If $f \in P_n(\F)$ and $f(c_i) = 0$, for $n+1$ distinct scalars $c_0, c_1, \dots, c_n$ in $\F$, then $f$ is the zero function.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -8,4 +8,4 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Two nonzero vectors $x$ and $y$ are called \textbf{parallel} if $y=tx$ for some nonzero real number $t$. (Thus nonzero vectors having the same or opposite directions are parallel.)
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $\mathcal{F}$ be the family of all subsets of a nonempty set $S$. This family $\mathcal{F}$ is called the \textbf{power set} of $S$.
|
||||
Let $\mathcal{F}$ be the family of all subsets of a nonempty set $S$. This family $\mathcal{F}$ is called the \textbf{power set} of $S$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
@@ -18,14 +18,14 @@
|
||||
\begin{definition}[\textbf{Maximal Principle}]
|
||||
\hfill\\
|
||||
Let $\mathcal{F}$ be a family of sets. If, for each chain $\mathcal{C} \subseteq \mathcal{F}$, there exists a member of $\mathcal{F}$ that contains each member of $\mathcal{C}$, then $\mathcal{F}$ contains a maximal member.\\
|
||||
|
||||
|
||||
\textbf{Note:} The \textit{Maximal Principle} is logically equivalent to the \textit{Axiom of Choice}, which is an assumption in most axiomatic developments of set theory.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $S$ be a subset of a vector space $V$. A \textbf{maximal linearly independent subset} of $S$ is a subset $B$ of $S$ satisfying both of the following conditions
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $B$ is linearly independent.
|
||||
\item The only linearly independent subset of $S$ that contains $B$ is $B$ itself.
|
||||
@@ -35,4 +35,4 @@
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Every vector space has a basis.
|
||||
\end{corollary}
|
||||
\end{corollary}
|
||||
|
||||
@@ -3,11 +3,11 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A subset $W$ of a vector space $V$ over a field $\F$ is called a \textbf{subspace} of $V$ if $W$ is a vector space over $\F$ with the operations of addition and scalar multiplication defined on $V$.\\
|
||||
|
||||
|
||||
In any vector space $V$, note that $V$ and $\{0\}$ are subspaces. The latter is called the \textbf{zero subspace} of $V$.
|
||||
|
||||
|
||||
Fortunately, it is not necessary to verify all of the vector space properties to prove that a subset is a subspace. Because properties (VS 1), (VS 2), (VS 5), (VS 6), (VS 7) and (VS 8) hold for all vectors in the vector space, these properties automatically hold for the vectors in any subset. Thus a subset $W$ of a vector space $V$ is a subspace of $V$ if and only if the following four properties hold:
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $x + y \in W$ whenever $x \in W$ and $y \in W$. ($W$ is \textbf{closed under addition}).
|
||||
\item $cx \in W$ whenever $c \in \F$ and $x \in W$. ($W$ is \textbf{closed under scalar multiplication}).
|
||||
@@ -19,7 +19,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space and $W$ a subset of $V$. Then $W$ is a subspace of $V$ if and only if the following three conditions hold for the operations defined in $V$.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $0 \in W$.
|
||||
\item $x + y \in W$ whenever $x \in W$ and $y \in W$.
|
||||
@@ -45,7 +45,7 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The \textbf{trace} of an $n \times n$ matrix $M$, denoted $\text{tr}(M)$, is the sum of the diagonal entries of $M$; that is,
|
||||
|
||||
|
||||
\[\text{tr}(M) = M_{11} + M_{22} + \dots + M_{nn}.\]
|
||||
\end{definition}
|
||||
|
||||
@@ -57,4 +57,4 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
An $m \times n$ matrix $A$ is called \textbf{upper triangular} if all entries lying below the diagonal entries are zero; that is, if $A_{ij} = 0$ whenever $i > j$.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
+34
-34
@@ -3,7 +3,7 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A \textbf{vector space} (or \textbf{linear space}) $V$ over a field $\F$ consists of a set on which two operations (called \textbf{addition} and \textbf{scalar multiplication}, respectively) are defined so that for each pair of elements $x$ and $y$ in $V$ there is a unique element $a$ in $\F$ and each element $x$ in $V$ there is a unique element $ax$ in $V$, such that the following conditions hold:
|
||||
|
||||
|
||||
\begin{description}
|
||||
\item[(VS 1)] For all $x, y$ in $V$, $x + y = y + x$ (commutativity of addition).
|
||||
\item[(VS 2)] For all $x, y$ in $V$, $(x + y) + z = x + (y + z)$ (associativity of addition).
|
||||
@@ -14,11 +14,11 @@
|
||||
\item[(VS 7)] For each element $a$ in $\F$ and each pair of elements $x, y$ in $V$, $a(x + y) = ax + ay$.
|
||||
\item[(VS 8)] For each pair of elements $a, b$ in $\F$ and each element $x$ in $V$, $(a + b)x = ax + bx$.
|
||||
\end{description}
|
||||
|
||||
|
||||
The elements $x + y$ and $ax$ are called the \textbf{sum} of $x$ and $y$ and the \textbf{product} of $a$ and $x$, respectively.\\
|
||||
|
||||
|
||||
The elements of the field $\F$ are called \textbf{scalars} and the elements of the vector space $V$ are called \textbf{vectors}.\\
|
||||
|
||||
|
||||
\textbf{Note:} The reader should not confuse this use of the word "vector" with the physical entity discussed in section 1.1: the word "vector" is now being used to describe any element of a vector space.
|
||||
\end{definition}
|
||||
|
||||
@@ -30,7 +30,7 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Vectors in $\F^n$ may be written as \textbf{column vectors}
|
||||
|
||||
|
||||
\[\begin{pmatrix} a_1 \\ a_2 \\ \vdots \\ a_n \end{pmatrix}\]
|
||||
rather than as \textbf{row vectors} $(a_1, a_2, \dots, a_n)$. Since a 1-tuple whose only entry is from $\F$ can be regarded as an element of $\F$, we usually write $\F$ rather than $\F^1$ for the vector space of 1-tuples with entry from $\F$.
|
||||
\end{definition}
|
||||
@@ -38,24 +38,24 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
An $m \times n$ \textbf{matrix} with entries from a field $\F$ is a rectangular array of the form
|
||||
|
||||
|
||||
\[\begin{pmatrix}
|
||||
a_{11} & a_{12} & \dots &a_{1n} \\
|
||||
a_{21} & a_{22} & \dots & a_{2n} \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
a_{m1} & a_{m2} & \dots & a_{mn}
|
||||
\end{pmatrix},\]
|
||||
a_{11} & a_{12} & \dots & a_{1n} \\
|
||||
a_{21} & a_{22} & \dots & a_{2n} \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
a_{m1} & a_{m2} & \dots & a_{mn}
|
||||
\end{pmatrix},\]
|
||||
where each entry $a_{ij}\ (1 \leq i \leq m,\ 1 \leq j \leq n)$ is an element of $\F$. We call the entries $a_{ij}$ with $i=j$ the \textbf{diagonal entries} of the matrix. The entries $a_{i1}, a_{i2}, \dots, a_{in}$ compose the \textbf{\textit{i}th row} of the matrix, and the entries $a_{1j}, a_{2j}, \dots, a_{mj}$ compose the \textbf{\textit{j}th column} of the matrix. The rows of the preceding matrix are regarded as vectors in $\F^n$, and the columns are regarded as vectors in $\F^m$. The $m \times n$ matrix in which each entry equals zero is called the \textbf{zero matrix} and is denoted by $O$.\\
|
||||
|
||||
|
||||
In this book, we denote matrices by capital italic letters (e.g. $A$, $B$, and $C$), and we denote the entry of a matrix $A$ that lies in row $i$ and column $j$ by $A_{ij}$. In addition, if the number of rows and columns of a matrix are equal, the matrix is called \textbf{square}.
|
||||
|
||||
|
||||
Two $m \times n$ matrices $A$ and $B$ are called \textbf{equal} if all their corresponding entries are equal, that is, if $A_{ij} = B_{ij}$ for $1 \leq i \leq m$ and $1 \leq j \leq n$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The set of all $m \times n$ matrices with entries from a field $\F$ is a vector space which we denote by $M_{m \times n}(\F)$, with the following operations of \textbf{matrix addition} and \textbf{scalar multiplication}: For $A, B \in M_{m \times n}(\F)$ and $c \in \F$,
|
||||
|
||||
|
||||
\[(A + B)_{ij} = A_{ij} + B_{ij}\ \ \ \text{and}\ \ \ (cA)_{ij} = cA_{ij}\]
|
||||
for $1 \leq i \leq m$ and $1 \leq j \leq n$.
|
||||
\end{definition}
|
||||
@@ -63,36 +63,36 @@
|
||||
\begin{definition}\label{Definition 1.7}
|
||||
\hfill\\
|
||||
Let $S$ be any nonempty set and $\F$ be any field, and let $\mathcal{F}(S, \F)$ denote the set of all functions from $S$ to $\F$. Two functions $f$ and $g$ in $\mathcal{F}(S, \F)$ are called \textbf{equal} if $f(s) = g(s)$ for each $s \in S$. The set $\mathcal{F}(S, \F)$ is a vector space with the operations of addition and scalar multiplication defined for $f,g \in \mathcal{F}(S, \F)$ and $c \in \F$ defined by
|
||||
|
||||
|
||||
\[(f + g)(s) = f(s) + g(s)\ \ \ \text{and}\ \ \ (cf)(s) = c[f(s)]\]
|
||||
for each $s \in S$. Note that these are the familiar operations of addition and scalar multiplication for functions used in algebra and calculus.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A \textbf{polynomial} with coefficients from a field $\F$ is an expression of the form
|
||||
|
||||
A \textbf{polynomial} with coefficients from a field $\F$ is an expression of the form
|
||||
|
||||
\[f(x)=a_nx^n + a_{n-1}x^{n-1}+\dots+a_1x+a_0,\]
|
||||
|
||||
where $n$ is a nonnegative integer and each $a_k$, called the \textbf{coefficient} of $x^k$, is in $\F$. If $f(x)=0$, that is, if $a_n = a_{n-1} = \dots = a_0 = 0$, then $f(x)$ is called the \textbf{zero polynomial} and, for convenience, its degree is defined to be $-1$; otherwise, the \textbf{degree} of a polynomial is defined to be the largest exponent of $x$ that appears in the representation
|
||||
|
||||
|
||||
where $n$ is a non-negative integer and each $a_k$, called the \textbf{coefficient} of $x^k$, is in $\F$. If $f(x)=0$, that is, if $a_n = a_{n-1} = \dots = a_0 = 0$, then $f(x)$ is called the \textbf{zero polynomial} and, for convenience, its degree is defined to be $-1$; otherwise, the \textbf{degree} of a polynomial is defined to be the largest exponent of $x$ that appears in the representation
|
||||
|
||||
\[f(x)=a_nx^n + a_{n-1}x^{n-1}+\dots+a_1x+a_0\]
|
||||
|
||||
|
||||
with a nonzero coefficient. Note that the polynomials of degree zero may be written in the form $f(x) = c$ for some nonzero scalar $c$. Two polynomials,
|
||||
|
||||
|
||||
\[f(x)=a_nx^n + a_{n-1}x^{n-1}+\dots+a_1x+a_0\]
|
||||
|
||||
|
||||
and
|
||||
|
||||
|
||||
\[g(x)=b_mx^m + b_{m-1}x^{m-1}+\dots+b_1x+b_0,\]
|
||||
|
||||
|
||||
are called \textbf{equal} if $m=n$ and $a_i = b_i$ for $i=1, 2, \dots, n$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $\F$ be any field. A \textbf{sequence} in $\F$ is a function $\sigma$ from the positive integers into $\F$. In this book, the sequence $\sigma$ such that $\sigma(n) = a_n$ for $n=1, 2, \dots$ is denoted $\{a_n\}$. Let $V$ consist of all sequences $\{a_n\}$ in $\F$ that have only a finite number of nonzero terms $a_n$. If $\{a_n\}$ and $\{b_n\}$ are in $V$ and $t \in \F$, define
|
||||
|
||||
|
||||
\[\{a_n\} + \{b_n\} = \{a_n + b_n\}\ \ \ \text{and}\ \ \ t\{a_n\} = \{ta_n\}\]
|
||||
\end{definition}
|
||||
|
||||
@@ -138,7 +138,7 @@
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
If $S_1$ and $S_2$ are nonempty subsets of a vector space $V$, then the \textbf{sum} of $S_1$ and $S_2$, denoted $S_1 + S_2$, is the set $\{x + y\ |\ x \in S_1,\ \text{and}\ y \in S_2\}$.
|
||||
If $S_1$ and $S_2$ are nonempty subsets of a vector space $V$, then the \textbf{sum} of $S_1$ and $S_2$, denoted $S_1 + S_2$, is the set $\{x + y : x \in S_1,\ \text{and}\ y \in S_2\}$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
@@ -153,18 +153,18 @@
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $W$ be a subspace of a vector space $V$ over a field $\F$. For any $v \in V$, the set $\{v\} + W = \{v + w\ |\ w \in W\}$ is called the \textbf{coset of $W$ containing $v$}. It is customary to denote this coset by $v + W$ rather than $\{v\} + W$.
|
||||
Let $W$ be a subspace of a vector space $V$ over a field $\F$. For any $v \in V$, the set $\{v\} + W = \{v + w : w \in W\}$ is called the \textbf{coset of $W$ containing $v$}. It is customary to denote this coset by $v + W$ rather than $\{v\} + W$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $W$ be a subspace of a vector space $V$ over a field $\F$, and let $S := \{v + W\ |\ v \in V\}$ be the set of all cosets of $W$. Then $S$ is called the \textbf{quotient space of $V$ modulo $W$}, and is denoted by $V/W$. Addition and scalar multiplication by the scalars of $\F$ can be defined as follows:
|
||||
|
||||
Let $W$ be a subspace of a vector space $V$ over a field $\F$, and let $S := \{v + W : v \in V\}$ be the set of all cosets of $W$. Then $S$ is called the \textbf{quotient space of $V$ modulo $W$}, and is denoted by $V/W$. Addition and scalar multiplication by the scalars of $\F$ can be defined as follows:
|
||||
|
||||
\[(v_1 + W) + (v_2 + W) = (v_1 + v_2) + W\]
|
||||
|
||||
|
||||
for all $v_1, v_2 \in V$, and
|
||||
|
||||
|
||||
\[a(v + W) = av + W\]
|
||||
|
||||
|
||||
for all $v \in V$ and $a \in \F$.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space. Let $T, U_1, U_2 \in \LL(V)$. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T(U_1 + U_2) = TU_1 + TU_2$ and $(U_1 + U_2)T = U_1T + U_2T$
|
||||
\item $T(U_1U_2) = (TU_1)U_2$
|
||||
@@ -20,18 +20,18 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix and $B$ be an $n \times p$ matrix. We define the \textbf{product} of $A$ and $B$, denoted $AB$, to be the $m \times p$ matrix such that
|
||||
|
||||
|
||||
\[(AB)_{ij} = \sum_{k=1}^{n}A_{ik}B_{kj}\ \ \text{for}\ \ 1 \leq i \leq m,\ \ 1 \leq j \leq p.\]
|
||||
|
||||
|
||||
Notice that $(AB)_{ij}$ is the sum of products of corresponding entries from the $i$th row of $A$ and the $j$th column of $B$.\\
|
||||
|
||||
|
||||
The reader should observe that in order for the product $AB$ to be defined, there are restrictions regarding the relative sizes of $A$ and $B$. The following mnemonic device is helpful: ``$(m \times n) \cdot (n \times p) = (m \times p)$"; that is, in order for the product $AB$ to be defined, the two ``inner" dimensions must be equal, and the two ``outer" dimensions yield the size of the product.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$, $W$, and $Z$ be finite-dimensional vector spaces with ordered bases $\alpha$, $\beta$, and $\gamma$, respectively. Let $T: V \to W$ and $U: W \to Z$ be linear transformations. Then
|
||||
|
||||
|
||||
\[[UT]_\alpha^\gamma = [U]_\beta^\gamma[T]_\alpha^\beta\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -48,7 +48,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix, $B$ and $C$ be $n \times p$ matrices, and $D$ and $E$ be $q \times m$ matrices. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $A(B + C) = AB + AC$ and $(D + E)A = DA + EA$.
|
||||
\item $a(AB) = (aA)B = A(aB)$ for any scalar $a$.
|
||||
@@ -60,18 +60,18 @@
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix, $B_1, B_2, \dots, B_k$ be $n \times p$ matrices, $C_1, C_2, \dots, C_k$ be $q \times m$ matrices, and $a_1, a_2, \dots, a_k$ be scalars. Then
|
||||
|
||||
|
||||
\[A\left(\sum_{i=1}^{k}a_iB_i\right) = \sum_{i=1}^{k}a_iAB_i\]
|
||||
|
||||
|
||||
and
|
||||
|
||||
|
||||
\[\left(\sum_{i=1}^{k}a_iC_i\right)A = \sum_{i=1}^{k}a_iC_iA.\]
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix and $B$ be an $n \times p$ matrix. For each $j$ ($1 \leq j \leq p$) let $u_j$ and $v_j$ denote the $j$th columns of $AB$ and $B$, respectively. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $u_j = Av_j$.
|
||||
\item $v_j = Be_j$, where $e_j$ is the $j$th standard vector of $\F^p$.
|
||||
@@ -81,7 +81,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be finite-dimensional vector spaces having ordered bases $\beta$ and $\gamma$, respectively, and let $T: V \to W$ be linear. Then, for each $u \in V$, we have
|
||||
|
||||
|
||||
\[[T(u)]_\gamma = [T]_\beta^\gamma [u]_\beta.\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -93,7 +93,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix with entries from $\F$. Then the left-multiplication transformation $L_A: \F^n \to \F^m$ is linear. Furthermore, if $B$ is any other $m \times n$ matrix (with entries from $\F$) and $\beta$ and $\gamma$ are the standard ordered bases for $\F^n$ and $\F^m$, respectively, then we have the following properties.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $[L_A]_\beta^\gamma = A$.
|
||||
\item $L_A = L_B$ if and only if $A = B$.
|
||||
@@ -117,4 +117,4 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A relationship among a group of people is called a \textbf{dominance relation} if the associated incidence matrix $A$ has the property that for all distinct pairs $i$ and $j$, $A_{ij} = 1$ if and only if $A_{ji} = 0$, that is, given any two people, exactly one of them \textit{dominates} the other.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
+12
-12
@@ -8,36 +8,36 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space of continuous real-valued functions on the interval $[0, 2\pi]$. Fix a function $g \in V$. The function $\mathsf{h}: V \to \R$, defined by
|
||||
|
||||
|
||||
\[\mathsf{h}(x) = \frac{1}{2\pi} \int_{0}^{2\pi}x(t)g(t) dt\]
|
||||
|
||||
|
||||
is a linear functional on $V$. In the cases that $g(t)$ equals $\sin(nt)$ or $\cos (nt)$, $\mathsf{h}(x)$ is often called the \textbf{\textit{n}th Fourier coefficient of $x$}.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a finite dimensional vector space, and let $\beta = \{x_1, x_2, \dots, x_n\}$ be an ordered basis for $V$. For each $i = 1, 2, \dots, n$, define $\mathsf{f}_i(x) = a_i$, where
|
||||
|
||||
|
||||
\[[x]_\beta = \begin{pmatrix} a_1 \\ a_2 \\ \vdots \\ a_n \end{pmatrix}\]
|
||||
|
||||
|
||||
is the coordinate vector of $x$ relative to $\beta$. Then $\mathsf{f}$ is a linear function on $V$ called the \textbf{\textit{i}th coordinate function with respect to the basis $\beta$}. Note that $\mathsf{f}_i(x_j) = \delta_{ij}$, where $\delta_{ij}$ is the Kronecker delta. These linear functionals play an important role in the theory of dual spaces (see \autoref{Theorem 2.24}).
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For a vector space $V$ over $\F$, we define the \textbf{dual space} of $V$ to be the vector space $\LL(V, \F)$, denoted by $V^*$.\\
|
||||
|
||||
|
||||
Thus $V^*$ is the vector space consisting of all linear functionals on $V$ with the operations of addition and scalar multiplication. Note that if $V$ is finite-dimensional, then by \autoref{Corollary 2.7}
|
||||
|
||||
|
||||
\[\ldim{V^*}= \ldim{\LL(V,\F)} = \ldim{V} \cdot \ldim{\F} = \ldim{V}.\]
|
||||
|
||||
|
||||
Hence by \autoref{Theorem 2.19}, $V$ and $V^*$ are isomorphic. We also define the \textbf{double dual} $V^{**}$ of $V$ to be the dual of $V^*$. In \autoref{Theorem 2.26}, we show, in fact, that there is a natural identification of $V$ and $V^{**}$ in the case that $V$ is finite-dimensional.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}\label{Theorem 2.24}
|
||||
\hfill\\
|
||||
Suppose that $V$ is a finite-dimensional vector space with the ordered basis $\beta = \{x_1, x_2, \dots, x_n\}$. Let $\mathsf{f}_i$ ($1 \leq i \leq n$) be the $i$th coordinate function with respect to $\beta$ as just defined, and let $\beta^*=\{\mathsf{f}_1, \mathsf{f}_2, \dots, \mathsf{f}_n\}$. Then $\beta^*$ is an ordered basis for $V^*$, and, for any $\mathsf{f} \in V^*$, we have
|
||||
|
||||
|
||||
\[\mathsf{f} = \sum_{i=1}^{n}\mathsf{f}(x_i)\mathsf{f}_i.\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -59,7 +59,7 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For a vector $x$ in a finite-dimensional vector space $V$, we define the linear functional $\hat{x}: V^* \to \F$ on $V^*$ by $\hat{x}(\mathsf{f}) = \mathsf{f}(x)$ for every $\mathsf{f} \in V^*$. Since $\hat{x}$ is a linear functional on $V^*$, $\hat{x} \in V^{**}$.\\
|
||||
|
||||
|
||||
The correspondence $x \leftrightarrow \hat{x}$ allows us to define the desired isomorphism between $V^*$ and $V^{**}$.
|
||||
\end{definition}
|
||||
|
||||
@@ -81,6 +81,6 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional vector space over $\F$. For every subset $S$ of $V$, define the \textbf{annihilator} $S^0$ of $S$ as
|
||||
|
||||
\[S^0 = \{\mathsf{f} \in V^*\ |\ \mathsf{f}(x) = 0,\ \text{for all}\ x \in S\}\]
|
||||
\end{definition}
|
||||
|
||||
\[S^0 = \{\mathsf{f} \in V^* : \mathsf{f}(x) = 0,\ \text{for all}\ x \in S\}\]
|
||||
\end{definition}
|
||||
|
||||
+34
-34
@@ -3,35 +3,35 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A \textbf{differential equation} in an unknown function $y = y(t)$ is an equation involving $y$, $t$, and derivatives of $y$. If the differential equation is of the form
|
||||
|
||||
|
||||
\begin{equation}
|
||||
a_ny^{(n)}+a_{n-1}y^{(n-)} + \dots + a_1y^{(1)}+a_0y = f,
|
||||
\end{equation}
|
||||
|
||||
|
||||
where $a_0, a_1, \dots, a_n$ and $f$ are functions of $t$ and $y^{(k)}$ denotes the $k$th derivative of $y$, then the equation is said to be \textbf{linear}. The functions $a_i$ are called the \textbf{coefficients} of the differential equation. When $f$ is identically zero, (2.1) is called \textbf{homogeneous}.\\
|
||||
|
||||
|
||||
If $a_n \neq 0$, we say that differential equation (2.1) is of \textbf{order \textit{n}}. In this case, we divide both sides by $a_n$ to obtain a new, but equivalent, equation
|
||||
|
||||
|
||||
\[y^{(n)} + b_{n-1}y^{(n-1)} + \dots + b_1y^{(1)} + b_0y = 0,\]
|
||||
|
||||
|
||||
where $b_i = a_i/a_n$ for $i=0, 1, \dots, n-1$. Because of this observation, we always assume that the coefficient $a_n$ in (2.1) is $1$.\\
|
||||
|
||||
|
||||
A \textbf{solution} to (2.1) is a function that when substituted for $y$ reduces (2.1) to an identity.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Given a complex-valued function $x \in \mathcal{F}(\R, \C)$ of a real variable $t$ (where $\mathcal{F}(\R, \C)$ is the vector space defined in \autoref{Definition 1.7}), there exist unique real-valued functions $x_1$ and $x_2$ of $t$, such that
|
||||
|
||||
|
||||
\[x(t) = x_1(t) + ix_2(t)\ \ \ \text{for}\ \ \ t \in \R,\]
|
||||
|
||||
|
||||
where $i$ is the imaginary number such that $i^2 = -1$. We call $x_1$ the \textbf{real part} and $x_2$ the \textbf{imaginary part} of $x$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Given a function $x \in \mathcal{F}(\R, \C)$ with real part $x_1$ and imaginary part $x_2$, we say that $x$ is \textbf{differentiable} if $x_1$ and $x_2$ are differentiable. If $x$ is differentiable, we define the \textbf{derivative} $x'$ of $x$ by
|
||||
|
||||
|
||||
\[x' = x'_1 + ix'_2\]
|
||||
\end{definition}
|
||||
|
||||
@@ -47,25 +47,25 @@
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For any polynomial $p(t)$ over $\C$ of positive degree, $p(D)$ is called a \textbf{differential operator}. The \textbf{order} of the differential operator $p(D)$ is the degree of the polynomial $p(t)$.
|
||||
For any polynomial $p(t)$ over $\C$ of positive degree, $p(\mathsf{D})$ is called a \textbf{differential operator}. The \textbf{order} of the differential operator $p(\mathsf{D})$ is the degree of the polynomial $p(t)$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Given the differential equation
|
||||
|
||||
|
||||
\[y^{(n)} + a_{n-1}y^{(n-1)}+ \dots + a_1y^{(1)} + a_0y = 0,\]
|
||||
|
||||
the complex polynomial
|
||||
|
||||
|
||||
the complex polynomial
|
||||
|
||||
\[p(t) = t^n + a_{n-1}t^{n-1} + \dots + a_1t + a_0\]
|
||||
|
||||
|
||||
is called the \textbf{auxiliary polynomial} associated with the equation.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
The set of all solutions to a homogeneous linear differential equation with constant coefficients coincides with the null space of $p(D)$ where $p(t)$ is the auxiliary polynomial associated with the equation.
|
||||
The set of all solutions to a homogeneous linear differential equation with constant coefficients coincides with the null space of $p(\mathsf{D})$ where $p(t)$ is the auxiliary polynomial associated with the equation.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
@@ -81,13 +81,13 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $c = a+ib$ be a complex number with real part $a$ and imaginary part $b$. Define
|
||||
|
||||
|
||||
\[e^c = e^a(\cos(b) + i\sin(b)).\]
|
||||
|
||||
|
||||
The special case
|
||||
|
||||
|
||||
\[e^{ib} = \cos(b) + i\sin(a)\]
|
||||
|
||||
|
||||
is called \textbf{Euler's formula}.
|
||||
\end{definition}
|
||||
|
||||
@@ -104,17 +104,17 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Recall that the \textbf{order} of a homogeneous linear differential equation is the degree of its auxiliary polynomial. Thus, an equation of order 1 is of the form
|
||||
|
||||
|
||||
\begin{equation}
|
||||
y' + a_0y = 0.
|
||||
\end{equation}
|
||||
|
||||
|
||||
The solution space for (2.2) is of dimension 1 and has $\{e^{-a_0t}\}$ as a basis.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
For any complex number $c$, the null space of the differential operator $D-c\mathsf{l}$ has $\{e^{ct}\}$ as a basis.
|
||||
For any complex number $c$, the null space of the differential operator $\mathsf{D}-cI$ has $\{e^{ct}\}$ as a basis.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
@@ -124,18 +124,18 @@
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For any differential operator $p(D)$ of order $n$, the null space of $p(D)$ is an $n$-dimensional subspace of $\C^\infty$.
|
||||
For any differential operator $p(\mathsf{D})$ of order $n$, the null space of $p(\mathsf{D})$ is an $n$-dimensional subspace of $\C^\infty$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
The differential operator $D - c\mathsf{l}: \C^\infty \to \C^\infty$ is onto for any complex number $c$.
|
||||
The differential operator $\mathsf{D} - cI: \C^\infty \to \C^\infty$ is onto for any complex number $c$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space, and suppose that $T$ and $U$ are linear operators on $V$ such that $U$ is onto and the null spaces of $T$ and $U$ are finite-dimensional. Then the null space of $TU$ is finite-dimensional, and
|
||||
|
||||
|
||||
\[\ldim{\n{TU}} = \ldim{\n{T}} + \ldim{\n{U}}\]
|
||||
\end{lemma}
|
||||
|
||||
@@ -157,28 +157,28 @@
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
For a given complex number $c$ and a positive integer $n$, suppose that $(t-c)^n$ is the auxiliary polynomial of a homogeneous linear differential equation with constant coefficients. Then the set
|
||||
|
||||
|
||||
\[\beta = \{e^{ct}, te^{ct}, \dots, t^{n-1}e^{ct}\}\]
|
||||
|
||||
|
||||
is a basis for the solution space of the equation.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Given a homogeneous linear differential equation with constant coefficients and auxiliary polynomial
|
||||
|
||||
|
||||
\[(t-c_1)^{n_1}(t-c_2)^{n_2}\dots(t-c_k)^{n_k},\]
|
||||
|
||||
|
||||
where $n_1, n_2, \dots, n_k$ are positive integers and $c_1, c_2, \dots, c_k$ are distinct complex numbers, the following set is a basis for the solution space of the equation:
|
||||
|
||||
|
||||
\[\{e^{c_1t}, te^{c_1t},\dots, t^{n_1-1}e^{c_1t}, \dots, e^{c_kt}, te^{c_kt}, \dots, t^{n_k-1}e^{c_kt}\}\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A differential equation
|
||||
|
||||
|
||||
\[y^{(n)} + a_{n-1}y^{(n-1)} + \dots + a_1y^{(1)} + a_0y = x\]
|
||||
|
||||
|
||||
is called a \textbf{nonhomogeneous} linear differential equation with constant coefficients if the $a_i$'s are constant and $x$ is a function that is not identically zero.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -3,16 +3,16 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces, and let $T: V \to W$ be linear. A function $U: W \to V$ is said to be an \textbf{inverse} of $T$ if $TU = I_W$ and $UT = I_V$. If $T$ has an inverse, then $T$ is said to be \textbf{invertible}. If $T$ is invertible, then the inverse of $T$ is unique and is denoted by $T^{-1}$.\\
|
||||
|
||||
|
||||
The following facts hold for invertible functions $T$ and $U$.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $(TU)^{-1} = U^{-1}T^{-1}$.
|
||||
\item $(T^{-1})^{-1} = T$; in particular, $T^{-1}$ is invertible.
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
We often use the fact that a function is invertible if and only if it is one-to-one and onto. We can therefore restate \autoref{Theorem 2.5} as follows:
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\setcounter{enumi}{2}
|
||||
\item Let $T: V \to W$ be a linear transformation, where $V$ and $W$ are finite-dimensional vector spaces of equal dimension. then $T$ is invertible if and only if $\rank{T} = \ldim{T}$.
|
||||
@@ -27,7 +27,7 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be an $n \times n$ matrix. Then $A$ is \textbf{invertible} if there exists an $n \times n$ matrix $B$ such that $AB = BA = I$.\\
|
||||
|
||||
|
||||
If $A$ is invertible, then the matrix $B$ such that $AB = BA = I$ is unique. (If $C$ were another such matrix, then $C = CI = C(AB) = (CA)B = IB = B$.) The matrix $B$ is called the \textbf{inverse} of $A$ and is denoted by $A^{-1}$.
|
||||
\end{definition}
|
||||
|
||||
@@ -84,4 +84,4 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For any finite-dimensional vector space $V$ with ordered basis $\beta$, $\phi_\beta$ is an isomorphism.
|
||||
\end{theorem}
|
||||
\end{theorem}
|
||||
|
||||
@@ -3,30 +3,30 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces (over $\F$). We call a function $T: V \to W$ a \textbf{linear transformation from $V$ to $W$} if, for all $x,y \in V$, and $c \in \F$, we have
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T(x + y) = T(x) + T(y)$, and
|
||||
\item $T(cx) = cT(x)$
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
If the underlying field $\F$ is the field of rational numbers, then (1) implies (2), but, in general (1) and (2) are logically independent.\\
|
||||
|
||||
|
||||
We often simply call $T$ \textbf{linear}.
|
||||
\end{definition}
|
||||
|
||||
\begin{remark}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces (over $\F$). Let $T: V \to W$ be a linear transformation. Then the following properties hold:
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item If $T$ is linear, then $T(0) = 0$.
|
||||
\item $T$ is linear if and only if $T(cx + y) = cT(x) + T(y)$ for all $x,y \in V$ and $c \in \F$.
|
||||
\item If $T$ is linear, then $T(x-y)=T(x)-T(y)$ for all $x,y \in V$.
|
||||
\item $T$ is linear if and only if, for $x_1, x_2, \dots, x_n \in V$ and $a_1, a_2, \dots, a_n \in \F$, we have
|
||||
|
||||
\[T\left(\sum_{i=1}^{n}a_ix_i\right)=\sum_{i=1}^{n}a_iT(x_i).\]
|
||||
|
||||
\[T\left(\sum_{i=1}^{n}a_ix_i\right)=\sum_{i=1}^{n}a_iT(x_i).\]
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
We generally use property 2 to prove that a given transformation is linear.
|
||||
\end{remark}
|
||||
|
||||
@@ -43,17 +43,17 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For vector spaces $V$ and $W$ (over $\F$), we define the \textbf{identity transformation} $I_V: V \to V$ by $I_V(x) = x$ for all $x \in V$.\\
|
||||
|
||||
|
||||
We define the \textbf{zero transformation} $T_0: V \to W$ by $T_0(x) = 0$ for all $x \in V$.\\
|
||||
|
||||
|
||||
\textbf{Note:} We often write $I$ instead of $I_V$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces, and let $T: V \to W$ be linear. We define the \textbf{null space} (or \textbf{kernel}) $\n{T}$ to be the set of all vectors $x \in V$ such that $T(x)=0$; that is, \\$\n{T} = \{x \in V\ |\ T(x) = 0\}$.
|
||||
|
||||
We define the \textbf{range} (or \textbf{image}) $\range{T}$ of $T$ to be the subset of $W$ consisting of all images (under $T$) of vectors in $V$; that is, $\range{T} = \{T(x)\ |\ x \in V\}$.
|
||||
Let $V$ and $W$ be vector spaces, and let $T: V \to W$ be linear. We define the \textbf{null space} (or \textbf{kernel}) $\n{T}$ to be the set of all vectors $x \in V$ such that $T(x)=0$; that is, \\$\n{T} = \{x \in V : T(x) = 0\}$.
|
||||
|
||||
We define the \textbf{range} (or \textbf{image}) $\range{T}$ of $T$ to be the subset of $W$ consisting of all images (under $T$) of vectors in $V$; that is, $\range{T} = \{T(x) : x \in V\}$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
@@ -64,7 +64,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces, and let $T: V \to W$ be linear. If $\beta = \{v_1, v_2, \dots, v_n\}$ is a basis for $V$, then
|
||||
|
||||
|
||||
\[\range{T} = \lspan{T(\beta)} = \lspan{\{T(v_1), T(v_2), \dots, T(v_n)\}}.\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
\begin{theorem}[\textbf{Dimension Theorem}]
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces, and let $T: V \to W$ be linear. If $V$ is finite-dimensional, then
|
||||
|
||||
|
||||
\[\nullity{T} + \rank{T} = \ldim{V}\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -88,7 +88,7 @@
|
||||
\begin{theorem}\label{Theorem 2.5}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces of equal (finite) dimension, and let $T: V \to W$ be linear. Then the following are equivalent.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T$ is one-to-one.
|
||||
\item $T$ is onto.
|
||||
@@ -114,4 +114,4 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space, and let $T: V \to W$ be linear. A subspace $W$ of $V$ is said to be \textbf{$T$-invariant} if $T(x) \in W$ for every $x \in W$, that is, $T(W) \subseteq W$. If $W$ is $T$-invariant, we define the \textbf{restriction of $T$ on $W$} to be the function $T_W: W \to W$ defined by $T_W(x) = T(x)$ for all $x \in W$.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
\begin{theorem}\label{Theorem 2.22}
|
||||
\hfill\\
|
||||
Let $\beta$ and $\beta'$ be two ordered bases for a finite-dimensional vector pace $V$, and let $Q = [I_V]_{\beta'}^\beta$. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $Q$ is invertible.
|
||||
\item For any $v \in V$, $[v]_\beta = Q[v]_{\beta'}$.
|
||||
@@ -23,11 +23,11 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $\beta$ and $\beta'$ be ordered bases for $V$. Suppose that $Q$ is the change of coordinate matrix that changes $\beta'$-coordinates into $\beta$-coordinates. Then
|
||||
|
||||
|
||||
\[[T]_{\beta'}=Q^{-1}[T]_\beta Q\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\begin{corollary}\label{Corollary 2.8}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$, and let $\gamma$ be an ordered basis for $\F^n$. Then $[L_A]_\gamma = Q^{-1}AQ$, where $Q$ is the $n \times n$ matrix whose $j$th column is the $j$th vector of $\gamma$.
|
||||
\end{corollary}
|
||||
@@ -35,6 +35,6 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ and $B$ be matrices in $M_{n \times n}(\F)$. We say that $B$ is \textbf{similar} to $A$ if there exists an invertible matrix $Q$ such that $B = Q^{-1}AQ$.\\
|
||||
|
||||
|
||||
Notice that the relation of similarity is an equivalence relation. So we need only say that $A$ and $B$ are similar.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -3,36 +3,36 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional vector space. An \textbf{ordered basis} for $V$ is a basis for $V$ endowed with a specific order; that is, an ordered basis for $V$ is a finite sequence of linearly independent vectors in $V$ that generates $V$.\\
|
||||
|
||||
|
||||
For the vector space $\F^n$, we call $\{e_1, e_2, \dots, e_n\}$ the \textbf{standard ordered basis} for $\F^n$. Similarly, for the vector space $P_n(\F)$, we call $\{1, x, \dots, x^n\}$ the \textbf{standard ordered basis} for $P_n(\F)$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $\beta = \{v_1, v_2, \dots, v_n\}$ be an ordered basis for a finite-dimensional vector space $V$. For $x \in V$, let $a_1, a_2, \dots, a_n$ be the unique scalar values such that
|
||||
|
||||
|
||||
\[x = \sum_{i=1}^{n}a_iv_i.\]
|
||||
|
||||
|
||||
We define the \textbf{coordinate vector of $x$ relative to $\beta$}, denoted by $[x]_\beta$, by
|
||||
|
||||
|
||||
\[[x]_\beta = \begin{pmatrix} a_1 \\ a_2 \\ \vdots \\ a_n\end{pmatrix}.\]
|
||||
|
||||
|
||||
Notice that $[v_i]_\beta = e_i$ in the preceding definition. It can be shown that the correspondence $x \to [x]_\beta$ provides us with a linear transformation from $V$ to $\F^n$.
|
||||
\end{definition}
|
||||
|
||||
\begin{notation}
|
||||
\hfill\\
|
||||
The following notation is used to construct a matrix representation of a linear transformation in the following definition.\\
|
||||
|
||||
|
||||
Suppose that $V$ and $W$ are finite-dimensional vector spaces with ordered bases $\beta = \{v_1, v_2, \dots, v_n\}$ and $\gamma = \{w_1, w_2, \dots, w_m\}$, respectively. Let $T: V \to W$ be linear. Then for each $j$, $1 \leq j \leq n$, there exist unique scalars $a_{ij} \in \F$, $1 \leq i \leq m$, such that
|
||||
|
||||
|
||||
\[T(v_j) = \sum_{i=1}^{m}a_{ij}w_i\ \ \text{for}\ 1 \leq j \leq n.\]
|
||||
\end{notation}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Using the notation above, we call the $m \times n$ matrix $A$ defined by $A_{ij} = a_{ij}$ the \textbf{matrix representation of $T$ in the ordered bases $\beta$ and $\gamma$.} and write $A = [T]_\beta^\gamma$. If $V = W$ and $\beta = \gamma$, then we write $A = [T]_\beta$.
|
||||
|
||||
|
||||
Notice that the $j$th column of $A$ is simply $[T(v_j)]_\gamma$. Also observe that if $U: V \to W$ is a linear transformation such that $[U]_\beta^\gamma = [T]_\beta^\gamma$, then $U=T$ by the corollary to Theorem 2.6 (\autoref{Corollary 2.1}).
|
||||
\end{definition}
|
||||
|
||||
@@ -44,7 +44,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be vector spaces over a field $\F$, and let $T,U: V \to W$ be linear.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item For all $a \in \F$, $aT+U$ is linear.
|
||||
\item Using the operations of addition and scalar multiplication in the preceding definition, the collection of all linear transformations from $V$ to $W$ is a vector space over $\F$.
|
||||
@@ -59,7 +59,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be finite-dimensional vector spaces with ordered bases $\beta$ and $\gamma$, respectively, and let $T,U: V \to W$ be linear transformations. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $[T+U]_\beta^\gamma = [T]_\beta^\gamma + [U]_\beta^\gamma$ and
|
||||
\item $[aT]_\beta^\gamma = a[T]_\beta^\gamma$ for all scalars $a$.
|
||||
|
||||
@@ -3,13 +3,13 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix. Any one of the following three operations on the rows [columns] of $A$ is called an \textbf{elementary row [column] operation}:
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item interchanging any two rows [columns] of $A$;
|
||||
\item multiplying any row [column] of $A$ by a nonzero scalar;
|
||||
\item adding any scalar multiple of a row [column] of $A$ to another row [column].
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
Any of these three operations are called an \textbf{elementary operation}. Elementary operations are of \textbf{type 1}, \textbf{type 2}, or \textbf{type 3} depending on whether they are obtained by (1), (2), or (3).
|
||||
\end{definition}
|
||||
|
||||
@@ -26,4 +26,4 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Elementary matrices are invertible, and the inverse of an elementary matrix is an elementary matrix of the same type.
|
||||
\end{theorem}
|
||||
\end{theorem}
|
||||
|
||||
@@ -18,12 +18,12 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A matrix is said to be in \textbf{reduced row echelon form} if the following three conditions are satisfied.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item Any row containing a nonzero entry precedes any row in which all the entries are zero (if any).
|
||||
|
||||
|
||||
\item The first nonzero entry in each row is the only nonzero entry in its column.
|
||||
|
||||
|
||||
\item The first nonzero entry in each row is 1 and it occurs in a column to the right of the first nonzero entry in the preceding row.
|
||||
\end{enumerate}
|
||||
\end{definition}
|
||||
@@ -31,10 +31,10 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The following procedure for reducing an augmented matrix to reduced row echelon form is called \textbf{Gaussian elimination}. It consists of two separate parts.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item In the \textit{forward pass}, the augmented matrix is transformed into an upper triangular matrix in which the first nonzero entry of each row is $1$, and it occurs in a column to the right of the first nonzero entry in the preceding row.
|
||||
|
||||
|
||||
\item In the \textit{backward pass} or \textit{back-substitution}, the upper triangular matrix is transformed into reduced row echelon form by making the first nonzero entry of each row the only nonzero entry of its column.
|
||||
\end{enumerate}
|
||||
\end{definition}
|
||||
@@ -46,30 +46,30 @@
|
||||
|
||||
\begin{definition}
|
||||
A solution to a system of equations of the form
|
||||
|
||||
|
||||
\[s = s_0 + t_1u_1 + t_2u_2 + \dots +t_{n-r}u_{n-r},\]
|
||||
|
||||
|
||||
where $r$ is the number of nonzero solutions in $A'$ ($r \leq m$), is called a \textbf{general solution} of the system $Ax = b$. It expresses an arbitrary solution $s$ of $Ax = b$ in terms of $n - r$ parameters.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $Ax = b$ be a system of $r$ nonzero equations in $n$ unknowns. Suppose that $\rank{A} = \rank{A|b}$ and that $(A|b)$ is in reduced row echelon form. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\rank{A} = r$.
|
||||
\item If the general solution obtained by the procedure above is of the form
|
||||
|
||||
\[s = s_0 + t_1u_1 + t_2u_2 + \dots + t_{n-r}u_{n-r},\]
|
||||
|
||||
then $\{u_1, u_2, \dots, u_{n-r}\}$ is a basis for the solution set of the corresponding homogeneous system, and $s_0$ is a solution to the original system.
|
||||
|
||||
\[s = s_0 + t_1u_1 + t_2u_2 + \dots + t_{n-r}u_{n-r},\]
|
||||
|
||||
then $\{u_1, u_2, \dots, u_{n-r}\}$ is a basis for the solution set of the corresponding homogeneous system, and $s_0$ is a solution to the original system.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix of rank $r$, where $r > 0$, and let $B$ be the reduced row echelon form of $A$. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item The number of nonzero rows in $B$ is $r$.
|
||||
\item For each $i = 1, 2, \dots, r$, there is a column $b_{j_i}$ of $B$ such that $b_{j_i} = e_i$.
|
||||
@@ -81,4 +81,4 @@
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
The reduced row echelon form of a matrix is unique.
|
||||
\end{corollary}
|
||||
\end{corollary}
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The system of equations
|
||||
|
||||
The system of equations
|
||||
|
||||
\begin{equation}\label{eq:S}
|
||||
\tag{S}
|
||||
\begin{split}
|
||||
@@ -13,47 +13,47 @@
|
||||
a_{m1}x_1 + a_{m2}x_2 + \dots + a_{mn}x_n = b_m,
|
||||
\end{split}
|
||||
\end{equation}
|
||||
|
||||
|
||||
where $a_{ij}$ and $b_i$ ($1 \leq i \leq m$ and $1 \leq j \leq n$) are scalars in a field $\F$ and $x_1, x_2, \dots, x_n$ are $n$ variables taking values in $\F$, is a called a \textbf{system of $m$ linear equations in $n$ unknowns over the field $\F$}.
|
||||
|
||||
The $m \times n$ matrix
|
||||
|
||||
|
||||
The $m \times n$ matrix
|
||||
|
||||
\[\begin{pmatrix}
|
||||
a_{11} & a_{12} & \dots & a_{1n} \\
|
||||
a_{21} & a_{22} & \dots & a_{2n} \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
a_{m1} & a_{m2} & \dots & a_{mn}
|
||||
\end{pmatrix}\]
|
||||
|
||||
a_{11} & a_{12} & \dots & a_{1n} \\
|
||||
a_{21} & a_{22} & \dots & a_{2n} \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
a_{m1} & a_{m2} & \dots & a_{mn}
|
||||
\end{pmatrix}\]
|
||||
|
||||
is called the \textbf{coefficient matrix} of the system \eqref{eq:S}.
|
||||
|
||||
|
||||
If we let
|
||||
|
||||
|
||||
\[x = \begin{pmatrix}
|
||||
x_1 \\ x_2 \\ \vdots \\ x_n
|
||||
\end{pmatrix}\ \ \text{and}\ \ b = \begin{pmatrix}
|
||||
b_1 \\ b_2 \\ \vdots \\ b_m
|
||||
\end{pmatrix},\]
|
||||
|
||||
x_1 \\ x_2 \\ \vdots \\ x_n
|
||||
\end{pmatrix}\ \ \text{and}\ \ b = \begin{pmatrix}
|
||||
b_1 \\ b_2 \\ \vdots \\ b_m
|
||||
\end{pmatrix},\]
|
||||
|
||||
then the system \eqref{eq:S} may be rewritten as a single matrix equation
|
||||
|
||||
|
||||
\[Ax = b.\]
|
||||
|
||||
|
||||
To exploit the results that we have developed, we often consider a system of linear equations as a single matrix equation.
|
||||
|
||||
|
||||
A \textbf{solution} to the system \eqref{eq:S} is an $n$-tuple
|
||||
|
||||
|
||||
\[s = \begin{pmatrix}
|
||||
s_1 \\ s_2 \\ \vdots \\ s_n
|
||||
\end{pmatrix} \in \F^n\]
|
||||
|
||||
s_1 \\ s_2 \\ \vdots \\ s_n
|
||||
\end{pmatrix} \in \F^n\]
|
||||
|
||||
such that $As = b$. The set of all solutions to the system \eqref{eq:S} is called the \textbf{solution set} of the system. System \eqref{eq:S} is called \textbf{consistent} if its solution set is nonempty; otherwise it is called \textbf{inconsistent}.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A system $Ax = b$ of $m$ linear equations in $n$ unknowns is said to be \textbf{homogeneous} if $b = 0$. Otherwise the system is said to be \textbf{nonhomogeneous}.\\
|
||||
|
||||
|
||||
Any homogeneous system has at least one solution, namely, the zero vector.
|
||||
\end{definition}
|
||||
|
||||
@@ -75,7 +75,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $K$ be the solution set of a system of linear equations $Ax = b$, and let $\mathsf{K}_\mathsf{H}$ be the solution set of the corresponding homogeneous system $Ax = 0$. Then for any solution $s$ to $Ax = b$
|
||||
|
||||
|
||||
\[K = \{s\} + \mathsf{K}_\mathsf{H} = \{s + k: k \in \mathsf{K}_\mathsf{H}\}.\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -96,33 +96,33 @@
|
||||
|
||||
\begin{definition}
|
||||
Consider a system of linear equations
|
||||
|
||||
|
||||
\[\begin{split}
|
||||
a_{11}p_1 + a_{12}p_2 + \dots + a_{1m}p_m = p_1 \\
|
||||
a_{21}p_1 + a_{22}p_2 + \dots + a_{2m}p_m = p_2 \\
|
||||
\dots \\
|
||||
a_{n1}p_1 + a_{n2}p_2 + \dots + a_{nm}p_m = p_m \\
|
||||
\end{split}\]
|
||||
|
||||
This system can be written as $Ap = p$, where
|
||||
|
||||
a_{11}p_1 + a_{12}p_2 + \dots + a_{1m}p_m = p_1 \\
|
||||
a_{21}p_1 + a_{22}p_2 + \dots + a_{2m}p_m = p_2 \\
|
||||
\dots \\
|
||||
a_{n1}p_1 + a_{n2}p_2 + \dots + a_{nm}p_m = p_m \\
|
||||
\end{split}\]
|
||||
|
||||
This system can be written as $Ap = p$, where
|
||||
|
||||
\[p = \begin{pmatrix}
|
||||
p_1 \\ p_2 \\ \vdots \\ p_m
|
||||
\end{pmatrix}\]
|
||||
|
||||
and $A$ is the coefficient matrix of the system. In this context, $A$ is called the \textbf{input-ouput (or consumption) matrix}, and $Ap = p$ is called the \textbf{equilibrium condition}.
|
||||
|
||||
For vectors $b = (b_1, b_2, \dots, b_n)$ and $c = (c_1, c_2, \dots, c_n)$ in $\R^n$, we use the notation $b \geq c$ [$b > c$] to mean $b_i \geq c_i$ [$b_i > c_i$] for all $i$. The vector $b$ is called \textbf{nonnegative [positive]} if $b \geq 0$ [$b > 0$].
|
||||
p_1 \\ p_2 \\ \vdots \\ p_m
|
||||
\end{pmatrix}\]
|
||||
|
||||
and $A$ is the coefficient matrix of the system. In this context, $A$ is called the \textbf{input-output (or consumption) matrix}, and $Ap = p$ is called the \textbf{equilibrium condition}.
|
||||
|
||||
For vectors $b = (b_1, b_2, \dots, b_n)$ and $c = (c_1, c_2, \dots, c_n)$ in $\R^n$, we use the notation $b \geq c$ [$b > c$] to mean $b_i \geq c_i$ [$b_i > c_i$] for all $i$. The vector $b$ is called \textbf{non-negative [positive]} if $b \geq 0$ [$b > 0$].
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $n \times n$ input-output matrix having the form
|
||||
|
||||
|
||||
\[A = \begin{pmatrix}
|
||||
B & C \\
|
||||
D & E
|
||||
\end{pmatrix},\]
|
||||
|
||||
where $D$ is a $1 \times (n -1)$ positive vector and $C$ is an $(n-1)\times 1$ positive vector. Then $(I -A)x = 0$ has a one-dimensional solution set that is generated by a nonnegative vector.
|
||||
\end{theorem}
|
||||
B & C \\
|
||||
D & E
|
||||
\end{pmatrix},\]
|
||||
|
||||
where $D$ is a $1 \times (n -1)$ positive vector and $C$ is an $(n-1)\times 1$ positive vector. Then $(I -A)x = 0$ has a one-dimensional solution set that is generated by a non-negative vector.
|
||||
\end{theorem}
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix. if $P$ and $Q$ are invertible $m \times m$ and $n \times n$ matrices, respectively, then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\rank{AQ} = \rank{A}$,
|
||||
\item $\rank{PA} = \rank{A}$,\\ and therefore
|
||||
@@ -34,30 +34,30 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix of rank $r$. Then $r \leq m$, $r \leq n$, and, by means of a finite number of elementary row and column operations, $A$ can be transformed into the matrix
|
||||
|
||||
|
||||
\[D = \begin{pmatrix}
|
||||
I_r & O_1 \\
|
||||
O_2 & O_3
|
||||
\end{pmatrix}\]
|
||||
|
||||
I_r & O_1 \\
|
||||
O_2 & O_3
|
||||
\end{pmatrix}\]
|
||||
|
||||
where $O_1$, $O_2$ and $O_3$ are the zero matrices. Thus $D_{ii} = 1$ for $i \leq r$ and $D_{ij} = 0$ otherwise.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix of rank $r$. Then there exist invertible matrices $B$ and $C$ of sizes $m \times m$ and $n \times n$, respectively, such that $D=BAC$, where
|
||||
|
||||
|
||||
\[D = \begin{pmatrix}
|
||||
I_r & O_1 \\
|
||||
O_2 & O_3
|
||||
\end{pmatrix}\]
|
||||
I_r & O_1 \\
|
||||
O_2 & O_3
|
||||
\end{pmatrix}\]
|
||||
is the $m \times n$ matrix in which $O_1$, $O_2$, and $O_3$ are zero matrices.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\rank{A^t} = \rank{A}$.
|
||||
\item The rank of any matrix equals the maximum number of its linearly independent rows; that is, the rank of a matrix is the dimension of the subspace generated by its rows.
|
||||
@@ -73,7 +73,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T: V \to W$ and $U: W \to Z$ be linear transformations on finite-dimensional vector spaces $V$, $W$, and $Z$, and let $A$ and $B$ be matrices such that the product $AB$ is defined. Then
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\rank{UT} \leq \rank{U}$.
|
||||
\item $\rank{UT} \leq \rank{T}$.
|
||||
@@ -85,4 +85,4 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ and $B$ be $m \times n$ and $m \times p$ matrices, respectively. By the \textbf{augmented matrix} $(A|B)$, we mean the $m \times (n \times p)$ matrix $(A\ B)$, that is, the matrix whose first $n$ columns are the columns of $A$, and whose last $p$ columns are the columns of $B$.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -3,15 +3,15 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A function $\delta: M_{n \times n}(\F) \to \F$ is called an \textbf{\textit{n}-linear function} if it is a linear function of each row of an $n \times n$ matrix when the remaining $n-1$ rows are held fixed, that is, $\delta$ is $n$-linear if, for every $r = 1, 2, \dots, n$, we have
|
||||
|
||||
|
||||
\[\delta\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u+kv \\ a_{r + 1} \\ \vdots \\ a_n
|
||||
\end{pmatrix} = \delta\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u \\ a_{r + 1} \\ \vdots \\ a_n
|
||||
\end{pmatrix} + k\delta\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ v \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix}\]
|
||||
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u+kv \\ a_{r + 1} \\ \vdots \\ a_n
|
||||
\end{pmatrix} = \delta\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u \\ a_{r + 1} \\ \vdots \\ a_n
|
||||
\end{pmatrix} + k\delta\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ v \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix}\]
|
||||
|
||||
whenever $k$ is a scalar and $u,v$ and each $a_i$ are vectors in $\F^n$.
|
||||
\end{definition}
|
||||
|
||||
@@ -23,7 +23,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $\delta: M_{n \times n}(\F) \to \F$ be an alternating $n$-linear function.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item If $A \in M_{n \times n}(\F)$ and $B$ is a matrix obtained from $A$ by interchanging any two rows of $A$, then $\delta(B) = -\delta(A)$.
|
||||
\item If $A \in M_{n \times n}(\F)$ has two identical rows, then $\delta(A) = 0$.
|
||||
@@ -53,4 +53,4 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
If $\delta: M_{n \times n}(\F) \to \F$ is an alternating $n$-linear function such that $\delta(I) = 1$, then $\delta(A) = \det(A)$ for every $A \in M_{n \times n}(\F)$.
|
||||
\end{theorem}
|
||||
\end{theorem}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
\chapter{Determinants}
|
||||
\chapter{Determinants}\label{Chapter 4}
|
||||
\subimport{./}{determinants-of-order-2.tex}
|
||||
\subimport{./}{determinants-of-order-n.tex}
|
||||
\subimport{./}{properties-of-determinants.tex}
|
||||
|
||||
@@ -3,46 +3,46 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
If
|
||||
|
||||
|
||||
\[A = \begin{pmatrix}
|
||||
a & b \\
|
||||
c & d
|
||||
\end{pmatrix}\]
|
||||
is a $2 \times 2$ matrix with entries from a field $\F$, then we define the \textbf{determinant} of $A$, denoted $\det(A)$ or $|A|$, to be the scalar $ad-bc$.
|
||||
a & b \\
|
||||
c & d
|
||||
\end{pmatrix}\]
|
||||
is a $2 \times 2$ matrix with entries from a field $\F$, then we define the \textbf{determinant} of $A$, denoted $\det(A)$ or $|A|$, to be the scalar $ad-bc$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
The function $\det: M_{2 \times 2}(\F) \to \F$ is a linear function of each row of a $2 \times 2$ matrix when the other row is held fixed. That is, if $u$, $v$ and $w$ are in $\F^2$ and $k$ is a scalar, then
|
||||
|
||||
|
||||
\[\det \begin{pmatrix}
|
||||
u + kv \\
|
||||
w
|
||||
\end{pmatrix} = \det\begin{pmatrix}
|
||||
u \\ w
|
||||
\end{pmatrix} + k\det\begin{pmatrix}
|
||||
v \\ w
|
||||
\end{pmatrix}\]
|
||||
|
||||
u + kv \\
|
||||
w
|
||||
\end{pmatrix} = \det\begin{pmatrix}
|
||||
u \\ w
|
||||
\end{pmatrix} + k\det\begin{pmatrix}
|
||||
v \\ w
|
||||
\end{pmatrix}\]
|
||||
|
||||
and
|
||||
|
||||
|
||||
\[\det\begin{pmatrix}
|
||||
w \\ u + kv
|
||||
\end{pmatrix} = \det\begin{pmatrix}
|
||||
w \\ u
|
||||
\end{pmatrix} + k \det \begin{pmatrix}
|
||||
w \\ v
|
||||
\end{pmatrix}.\]
|
||||
w \\ u + kv
|
||||
\end{pmatrix} = \det\begin{pmatrix}
|
||||
w \\ u
|
||||
\end{pmatrix} + k \det \begin{pmatrix}
|
||||
w \\ v
|
||||
\end{pmatrix}.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}\label{Theorem 4.2}
|
||||
\hfill\\
|
||||
Let $A \in M_{2 \times 2}(\F)$. Then the determinant of $A$ is nonzero if and only if $A$ is invertible. Moreover, if $A$ is invertible, then
|
||||
|
||||
|
||||
\[A^{-1} = \frac{1}{\det(A)}\begin{pmatrix}
|
||||
A_{22} & -A_{12} \\
|
||||
-A_{21} & A_{11}
|
||||
\end{pmatrix}.\]
|
||||
A_{22} & -A_{12} \\
|
||||
-A_{21} & A_{11}
|
||||
\end{pmatrix}.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
@@ -53,14 +53,14 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
If $\beta = \{u,v\}$ is an ordered basis for $\R^2$, we define the \textbf{orientation} of $\beta$ to be the real number
|
||||
|
||||
|
||||
\[O\begin{pmatrix}
|
||||
u \\ v
|
||||
\end{pmatrix} = \frac{\det\begin{pmatrix}
|
||||
u \\ v
|
||||
\end{pmatrix}}{\abs{\det\begin{pmatrix}
|
||||
u \\ v
|
||||
\end{pmatrix}}}\]
|
||||
\end{pmatrix} = \frac{\det\begin{pmatrix}
|
||||
u \\ v
|
||||
\end{pmatrix}}{\abs{\det\begin{pmatrix}
|
||||
u \\ v
|
||||
\end{pmatrix}}}\]
|
||||
|
||||
(The denominator of this fraction is nonzero by \autoref{Theorem 4.2}).
|
||||
\end{definition}
|
||||
@@ -73,4 +73,4 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Any ordered set $\{u, v\}$ in $\R^2$ determines a parallelogram in the following manner. Regarding $u$ and $v$ as arrows emanating from the origin of $\R^2$, we call the parallelogram having $u$ and $v$ as adjacent sides the \textbf{parallelogram determined by $u$ and $v$}.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -3,88 +3,88 @@
|
||||
\begin{notation}
|
||||
\hfill\\
|
||||
Given $A \in M_{n \times n}(\F)$, for $n \geq 2$, denote the $(n-1) \times (n - 1)$ matrix obtained from $A$ by deleting row $i$ and column $j$ by $\tilde{A}_{ij}$. Thus for
|
||||
|
||||
|
||||
\[A = \begin{pmatrix}
|
||||
1 & 2 & 3 \\
|
||||
4 & 5 & 6 \\
|
||||
7 & 8 & 9
|
||||
\end{pmatrix} \in M_{3 \times 3}(\R)\]
|
||||
|
||||
1 & 2 & 3 \\
|
||||
4 & 5 & 6 \\
|
||||
7 & 8 & 9
|
||||
\end{pmatrix} \in M_{3 \times 3}(\R)\]
|
||||
|
||||
we have
|
||||
|
||||
|
||||
\[\tilde{A}_{11} = \begin{pmatrix}
|
||||
5 & 6 \\
|
||||
8 & 9
|
||||
\end{pmatrix},\ \ \ \ \
|
||||
\end{pmatrix},\ \ \ \ \
|
||||
\tilde{A}_{13}=\begin{pmatrix}
|
||||
4 & 5 \\
|
||||
7 & 8
|
||||
\end{pmatrix},\ \ \ \ \
|
||||
\end{pmatrix},\ \ \ \ \
|
||||
\tilde{A}_{32} = \begin{pmatrix}
|
||||
1 & 3 \\
|
||||
4 & 6
|
||||
\end{pmatrix}\]
|
||||
|
||||
|
||||
and for
|
||||
|
||||
|
||||
\[B = \begin{pmatrix}
|
||||
1 & -1 & 2 & -1 \\
|
||||
-3 & 4 & 1 & -1 \\
|
||||
2 & -5 & -3 & 8 \\
|
||||
-2 & 6 & -4 & 1
|
||||
\end{pmatrix}\]
|
||||
|
||||
1 & -1 & 2 & -1 \\
|
||||
-3 & 4 & 1 & -1 \\
|
||||
2 & -5 & -3 & 8 \\
|
||||
-2 & 6 & -4 & 1
|
||||
\end{pmatrix}\]
|
||||
|
||||
we have
|
||||
|
||||
|
||||
\[\tilde{B}_{23} = \begin{pmatrix}
|
||||
1 & -1 & -1 \\
|
||||
2 & -5 & 8 \\
|
||||
-2 & 6 & 1
|
||||
\end{pmatrix}\ \ \ \ \ \text{and}\ \ \ \ \ \tilde{B}_{42}=\begin{pmatrix}
|
||||
1 & 2 & -1 \\
|
||||
-3 & 1 & -1 \\
|
||||
2 & -3 & 8
|
||||
\end{pmatrix}\]
|
||||
1 & -1 & -1 \\
|
||||
2 & -5 & 8 \\
|
||||
-2 & 6 & 1
|
||||
\end{pmatrix}\ \ \ \ \ \text{and}\ \ \ \ \ \tilde{B}_{42}=\begin{pmatrix}
|
||||
1 & 2 & -1 \\
|
||||
-3 & 1 & -1 \\
|
||||
2 & -3 & 8
|
||||
\end{pmatrix}\]
|
||||
\end{notation}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$. If $n =1$, so that $A = (A_{11})$, we define $\det(A) = A_{11}$. For $n \geq 2$, we define $\det(A)$ recursively as
|
||||
|
||||
|
||||
\[\det(A) = \sum_{j=1}^{n}(-1)^{1+j}A_{1j}\cdot\det(\tilde{A}_{1j}).\]
|
||||
|
||||
|
||||
The scalar $\det(A)$ is called the \textbf{determinant} of $A$ and is also denoted by $|A|$. The scalar
|
||||
|
||||
|
||||
\[(-1)^{i+j}\det(\tilde{A}_{ij})\]
|
||||
|
||||
|
||||
is called the \textbf{cofactor} of the entry of $A$ in row $i$, column $j$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Letting
|
||||
|
||||
|
||||
\[c_{ij} = (-1)^{i+j}\det(\tilde{A}_{ij})\]
|
||||
|
||||
|
||||
denote the cofactor of the row $i$, column $j$ entry of $A$, we can express the formula for the determinant of $A$ as
|
||||
|
||||
|
||||
\[\det(A) = A_{11}c_{11} + A_{12}c_{12}+\dots+A_{1n}c_{1n}.\]
|
||||
|
||||
|
||||
Thus the determinant of $A$ equals the sum of the products of each entry in row $1$ of $A$ multiplied by its cofactor. This formula is called \textbf{cofactor expansion along the first row} of $A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
the determinant of an $n \times n$ matrix is a linear function of each row when the remaining rows are held fixed. That is, for $1 \leq r \leq n$, we have
|
||||
|
||||
|
||||
\[\det\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u+kv \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix}=\det\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix} + k\det\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ v \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix}\]
|
||||
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u+kv \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix}=\det\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ u \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix} + k\det\begin{pmatrix}
|
||||
a_1 \\ \vdots \\ a_{r-1} \\ v \\ a_{r+1} \\ \vdots \\ a_n
|
||||
\end{pmatrix}\]
|
||||
|
||||
wherever $k$ is a scalar and $u, v$ and each $a_i$ are row vectors in $\F^n$.
|
||||
\end{theorem}
|
||||
|
||||
@@ -101,7 +101,7 @@
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
The determinant of a square matrix can be evaluated by cofactor expansion along any row. That is, if $A \in M_{n \times n}(\F)$, then for any integer $i$ ($1 \leq i \leq n$),
|
||||
|
||||
|
||||
\[\det(A) = \sum_{j=1}^{n}(-1)^{i+j}A_{ij}\cdot\det(\tilde{A}_{ij}).\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -128,7 +128,7 @@
|
||||
\begin{remark}\label{Remark 4.1}
|
||||
\hfill\\
|
||||
The following rules summarize the effect of an elementary row operation on the determinant of a matrix $A \ in M_{n \times n}(\F)$.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item If $B$ is a matrix obtained by interchanging any two rows of $A$, then $\det(B) = -\det(A)$.
|
||||
\item If $B$ is a matrix obtained by multiplying a row of $A$ by a nonzero scalar $k$, then $\det(B) = k\det(A)$.
|
||||
@@ -139,4 +139,4 @@
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
The determinant of an upper triangular matrix is the product of its diagonal entries.
|
||||
\end{lemma}
|
||||
\end{lemma}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
\begin{remark}
|
||||
\hfill\\
|
||||
Because the determinant of the $n \times n$ matrix is $1$, we can interpret \autoref{Remark 4.1} as the following facts about the determinants of elementary matrices.
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item If $E$ is an elementary matrix obtained by interchanging any two rows of $I$, then $\det(E) = -1$.
|
||||
\item If $E$ is an elementary matrix obtained by multiplying some row of $I$ by the nonzero scalar $k$, then $\det(E) = k$.
|
||||
@@ -28,10 +28,10 @@
|
||||
|
||||
\begin{theorem}[\textbf{Cramer's Rule}]
|
||||
\hfill\\
|
||||
Let $Ax = b$ be the matrix form of a system of $n$ linear equations in $n$ unknowns, where $x = (x_1, x_2, \dots, x_n)^t$. If $\det(A) \neq 0$, then this system has a unique solution, and for each $k$ ($k = 1, 2, \dots, n$),
|
||||
|
||||
Let $Ax = b$ be the matrix form of a system of $n$ linear equations in $n$ unknowns, where $x = (x_1, x_2, \dots, x_n)^t$. If $\det(A) \neq 0$, then this system has a unique solution, and for each $k$ ($k = 1, 2, \dots, n$),
|
||||
|
||||
\[x_k = \frac{\det(M_k)}{\det(A)},\]
|
||||
|
||||
|
||||
where $M_k$ is the $n \times n$ matrix obtained from $A$ by replacing column $k$ of $A$ by $b$.
|
||||
\end{theorem}
|
||||
|
||||
@@ -68,14 +68,14 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A matrix of the form
|
||||
|
||||
|
||||
\[\begin{pmatrix}
|
||||
1 & c_0 & c_0^2 & \dots & c_0^n \\
|
||||
1 & c_1 & c_1^2 & \dots & c_1^n \\
|
||||
\vdots & \vdots & \vdots & &\vdots \\
|
||||
1 & c_n & c_n^2 & \dots & c_n^n
|
||||
\end{pmatrix}\]
|
||||
|
||||
1 & c_0 & c_0^2 & \dots & c_0^n \\
|
||||
1 & c_1 & c_1^2 & \dots & c_1^n \\
|
||||
\vdots & \vdots & \vdots & & \vdots \\
|
||||
1 & c_n & c_n^2 & \dots & c_n^n
|
||||
\end{pmatrix}\]
|
||||
|
||||
is called a \textbf{Vandermonde matrix}.
|
||||
\end{definition}
|
||||
|
||||
@@ -92,13 +92,13 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $y_1, y_2, \dots, y_n$ be linearly independent function in $\C^\infty$. For each $y \in \C^\infty$, define $T(y) \in \C^\infty$ by
|
||||
|
||||
|
||||
\[[T(y)](t) = \det\begin{pmatrix}
|
||||
y(t) & y_1(t) & y_2(t) & \dots & y_n(t) \\
|
||||
y'(t) & y'_1(t) & y'_2(t) & \dots & y'_n(t) \\
|
||||
\vdots & \vdots & \vdots & &\vdots \\
|
||||
y^{(n)}(t) & y_1^{(n)}(t) & y_2^{(n)}(t) & \dots & y_n^{(n)}(t)
|
||||
\end{pmatrix}\]
|
||||
|
||||
y(t) & y_1(t) & y_2(t) & \dots & y_n(t) \\
|
||||
y'(t) & y'_1(t) & y'_2(t) & \dots & y'_n(t) \\
|
||||
\vdots & \vdots & \vdots & & \vdots \\
|
||||
y^{(n)}(t) & y_1^{(n)}(t) & y_2^{(n)}(t) & \dots & y_n^{(n)}(t)
|
||||
\end{pmatrix}\]
|
||||
|
||||
The preceding determinant is called the \textbf{Wronskian} of $y, y_1, \dots, y_n$.
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -3,21 +3,21 @@
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The \textbf{determinant} of an $n \times n$ matrix $A$ having entries from a field $\F$ is a scalar in $\F$, denoted by $\det(A)$ or $|A|$, and can be computed in the following manner:
|
||||
|
||||
|
||||
\begin{enumerate}
|
||||
\item If $A$ is $1 \times 1$, then $\det(A) = A_{11}$, the single entry of $A$.
|
||||
\item If $A$ is $2 \times 2$, then $\det(A) = A_{11}A_{22} - A_{12}A_{21}$.
|
||||
\item If $A$ is $n \times n$ for $n > 2$, then
|
||||
|
||||
\[\det(A) = \sum_{j=1}^{n}(-1)^{i+j}A_{ij}\cdot\det(\tilde{A}_{ij})\]
|
||||
|
||||
(if the determinant is evaluated by the entries of row $i$ of $A$) or
|
||||
|
||||
\[\det(A) = \sum_{i=1}^{n}(-1)^{i+j}A_{ij}\cdot\det(\tilde{A}_{ij})\]
|
||||
|
||||
(if the determinant is evaluated by the entries of column $j$ of $A$), where $\tilde{A}_{ij}$ is the $(n-1) \times (n-1)$ matrix obtained by deleting row $i$ and column $j$ from $A$.
|
||||
|
||||
\[\det(A) = \sum_{j=1}^{n}(-1)^{i+j}A_{ij}\cdot\det(\tilde{A}_{ij})\]
|
||||
|
||||
(if the determinant is evaluated by the entries of row $i$ of $A$) or
|
||||
|
||||
\[\det(A) = \sum_{i=1}^{n}(-1)^{i+j}A_{ij}\cdot\det(\tilde{A}_{ij})\]
|
||||
|
||||
(if the determinant is evaluated by the entries of column $j$ of $A$), where $\tilde{A}_{ij}$ is the $(n-1) \times (n-1)$ matrix obtained by deleting row $i$ and column $j$ from $A$.
|
||||
\end{enumerate}
|
||||
|
||||
|
||||
In the formulas above, the scalar $(-1)^{i+j}\det(\tilde{A}_{ij})$ is called the \textbf{cofactor} of the row $i$ column $j$ of $A$.
|
||||
\end{definition}
|
||||
|
||||
@@ -28,4 +28,4 @@
|
||||
\item If $B$ is a matrix obtained by multiplying each entry of some row or column of an $n \times n$ matrix $A$ by a scalar $k$, then $\det(B) = k\cdot\det(A)$.
|
||||
\item If $B$ is a matrix obtained from an $n \times n$ matrix $A$ by adding a multiple of row $i$ to row $j$ or a multiple of column $i$ to column $j$ for $i \neq j$, then $\det(B) = \det(A)$.
|
||||
\end{enumerate}
|
||||
\end{definition}
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,117 @@
|
||||
\section{Diagonalizability}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda_1, \lambda_2, \dots, \lambda_k$ be distinct eigenvectors of $T$. If $v_1, v_2, \dots, v_k$ are eigenvectors of $T$ such that $\lambda_i$ corresponds to $v_i$ ($1 \leq i \leq k$), then $\{v_1, v_2, \dots, v_k\}$ is linearly independent.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on an $n$-dimensional vector space $V$. If $T$ has $n$ distinct eigenvalues, then $T$ is diagonalizable.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A polynomial $f(t)$ in $P(\F)$ \textbf{splits over} $\F$ if there are scalars $c, a_1, \dots, a_n$ (not necessarily distinct) in $\F$ such that
|
||||
|
||||
\[f(t) = c(t-a_1)(t-a_2)\dots(t-a_n).\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
The characteristic polynomial of any diagonalizable linear operator splits.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $\lambda$ be an eigenvalue of a linear operator or matrix with characteristic polynomial $f(t)$. The \textbf{(algebraic) multiplicity} of $\lambda$ is the largest positive integer $k$ for which $(t - \lambda)^k$ is a factor of $f(t)$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda$ be an eigenvalue of $T$. Define $E_\lambda = \{x \in V : T(x) = \lambda x \}=\n{T - \lambda I_V}$. The set $E_\lambda$ is called the \textbf{eigenspace} of $T$ corresponding to the eigenvalue $\lambda$. Analogously, we define the \textbf{eigenspace} of a square matrix $A$ to be the eigenspace of $L_A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $\lambda$ be an eigenvalue of $T$ having multiplicity $m$. Then $1 \leq \ldim{E_\lambda} \leq m$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator, and let $\lambda_1, \lambda_2, \dots, \lambda_k$ be distinct eigenvalues of $T$. For each $i=1, 2, \dots, k$, let $v_i \in E_{\lambda_i}$, the eigenspace corresponding to $\lambda_i$. If
|
||||
|
||||
\[v_1 + v_2 + \dots + v_k = 0,\]
|
||||
|
||||
then $v_i = 0$ for all $i$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda_1, \lambda_2, \dots, \lambda_k$ be distinct eigenvalues of $T$. For each $i = 1, 2, \dots, k$, let $S_i$ be a finite linearly independent subset of the eigenspace $E_{\lambda_i}$. Then $S = S_1 \cup S_2 \cup \dots \cup S_k$ is a linearly independent subset of $V$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ such that the characteristic polynomial of $T$ splits. Let $\lambda_1, \lambda_2, \dots \lambda_k$ be the distinct eigenvalues of $T$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T$ is diagonalizable if and only if the multiplicity of $\lambda_i$ is equal to $\ldim{E_{\lambda_i}}$ for all $i$.
|
||||
\item If $T$ is diagonalizable and $\beta_i$ is an ordered basis for $E_{\lambda_i}$ for each $i$, then $\beta = \beta_1 \cup \beta_2 \cup \dots \cup \beta_k$ for an ordered basis for $V$ consisting of eigenvectors of $T$.\\
|
||||
|
||||
\textbf{Note:} We regard $\beta_1 \cup \beta_2 \cup \dots \cup \beta_k$ as an ordered basis in the natural way -- the vectors in $\beta_1$ are listed first (in the same order as in $\beta_1$), then the vectors in $\beta_2$ (in the same order as $\beta_2$), etc.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{remark}[\textbf{Test for Diagonalization}]
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on an $n$-dimensional vector space $V$. Then $T$ is diagonalizable if and only if both of the following conditions hold.
|
||||
|
||||
\begin{enumerate}
|
||||
\item The characteristic polynomial of $T$ splits.
|
||||
\item For each eigenvalue $\lambda$ of $T$, the multiplicity of $\lambda$ equals $n - \rank{T - \lambda I}$.
|
||||
\end{enumerate}
|
||||
\end{remark}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $W_1, W_2, \dots, W_k$ be subspaces of a vector space $V$. We define the \textbf{sum} of these subspaces to be the set
|
||||
|
||||
\[\{v_1 + v_2 + \dots + v_k : v_i \in W_i\ \text{for}\ 1 \leq i \leq k\}\]
|
||||
|
||||
which we denote by $W_1 + W_2 + \dots + W_k$ or $\displaystyle\sum_{i=1}^{k}W_i$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $W_1, W_2, \dots, W_k$ be subspaces of a vector space $V$. We call $V$ the \textbf{direct sum} of the subspaces $W_1, W_2, \dots, W_k$ and write $V = W_1 \oplus W_2 \oplus \dots \oplus W_k$, if
|
||||
|
||||
\[V = \sum_{i=1}^{k}W_i\]
|
||||
|
||||
and
|
||||
|
||||
\[W_j \cap \sum_{i \neq j} W_i = \{0\}\ \ \ \text{for each}\ j\ (1 \leq j \leq k)\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $W_1, W_2, \dots, W_k$ be subspaces of a finite-dimensional vector space $V$. The following conditions are equivalent.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $V = W_1 \oplus W_2 \oplus \dots \oplus W_k$.
|
||||
\item $V = \displaystyle\sum_{i=1}^{k}W_i$ and, for any vectors $v_1, v_2, \dots, v_k$ such that $v_i \in W_i$ ($1 \leq i \leq k$), if $v_1 + v_2 + \dots + v_k = 0$, then $v_i = 0$ for all $i$.
|
||||
\item Each vector $v \in V$ can be uniquely written as $v = v_1 + v_2 + \dots + v_k$, where $v_i \in W_i$.
|
||||
\item If $\gamma_i$ is an ordered basis for $W_i$ ($1 \leq i \leq k$), then $\gamma_1 \cup \gamma_2 \cup \dots \cup \gamma_k$ is an ordered basis for $V$.
|
||||
\item For each $i = 1, 2, \dots, k$, there exists an ordered basis $\gamma_i$ for $W_i$ such that $\gamma_i \cup \gamma_2 \cup \dots \cup \gamma_k$ is an ordered basis for $V$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
A linear operator $T$ on a finite-dimensional vector space $V$ is diagonalizable if and only if $V$ is the direct sum of the eigenspaces of $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Two linear operators $T$ and $U$ on a finite-dimensional vector space $V$ are called \textbf{simultaneously diagonalizable} if there exists an ordered basis $\beta$ or $V$ such that both $[T]_\beta$ and $[U]_\beta$ are diagonal matrices. Similarly, $A, B \in M_{n \times n}(\F)$ are called \textbf{simultaneously diagonalizable} if there exists an invertible matrix $Q \in M_{n \times n}(\F)$ such that both $Q^{-1}AQ$ and $Q^{-1}BQ$ are diagonal matrices.
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,64 @@
|
||||
\section{Eigenvalues and Eigenvectors}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A linear operator $T$ on a finite-dimensional vector space $V$ is called \textbf{diagonalizable} if there is an ordered basis $\beta$ for $V$ such that $[T]_\beta$ is a diagonal matrix. A square matrix $A$ is called \textbf{diagonalizable} if $L_A$ is diagonalizable.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$. A nonzero vector $v \in V$ is called an \textbf{eigenvector} of $T$ if there exists a scalar $\lambda$ such that $T(v) = \lambda v$. The scalar $\lambda$ is called the \textbf{eigenvalue} corresponding to the eigenvector $v$.
|
||||
|
||||
Let $A$ be in $M_{n \times n}(\F)$. A nonzero vector $v \in \F^n$ is called an \textbf{eigenvector} of $A$ if $v$ is an eigenvector of $L_A$; that is, if $Av = \lambda v$ for some scalar $\lambda$. The scalar $\lambda$ is called the \textbf{eigenvalue} of $A$ corresponding to the eigenvector $v$.\\
|
||||
|
||||
The words \textit{characteristic vector} and \textit{proper vector} are also used in place of \textit{eigenvector}. The corresponding terms for \textit{eigenvalue} are \textit{characteristic value} and \textit{proper value}.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
A linear operator $T$ on a finite-dimensional vector space $V$ is diagonalizable if and only if there exists an ordered basis $\beta$ for $V$ consisting of eigenvectors of $T$. Furthermore, if $T$ is diagonalizable, $\beta = \{v_1, v_2, \dots, v_n\}$ is an ordered basis of eigenvectors of $T$, and $D=[T]_\beta$, then $D$ is a diagonal matrix and $D_{ij}$ is the eigenvalue corresponding to $v_j$ for $1 \leq j \leq n$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{remark}
|
||||
\hfill\\
|
||||
To \textit{diagonalize} a matrix or linear operator is to find a basis of eigenvectors and the corresponding eigenvalues.
|
||||
\end{remark}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$. Then a scalar $\lambda$ is an eigenvalue of $A$ if and only if $\det(A - \lambda I_n) = 0$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$. The polynomial $f(t) = \det(A - tI_n)$ is called the \textbf{characteristic polynomial} of $A$.\\
|
||||
|
||||
The observant reader may have noticed that the entries of the matrix $A - tI_n$ are not scalars in the field $\F$. They are, however, scalars in another field $\F(t)$, the field of quotients of polynomials in $t$ with coefficients from $\F$. Consequently, any results proved about determinants in \autoref{Chapter 4} remain valid in this context.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on an $n$-dimensional vector space $V$ with ordered basis $\beta$. We define the \textbf{characteristic polynomial} $f(t)$ of $T$ to be the characteristic polynomial of $A=[T]_\beta$. That is,
|
||||
|
||||
\[f(t) = \det(A - tI_n).\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$.
|
||||
|
||||
\begin{enumerate}
|
||||
\item The characteristic polynomial of $A$ is a polynomial of degree $n$ with leading coefficient $(-1)^n$.
|
||||
\item $A$ has at most $n$ distinct eigenvalues.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda$ be an eigenvalue of $T$. A vector $v$ is an eigenvector of $T$ corresponding to $\lambda$ if and only if $v \neq 0$ and $v \in \n{T - \lambda I)}$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A \textbf{scalar matrix} is a square matrix of the form $\lambda I$ for some scalar $\lambda$; that is, a scalar matrix is a diagonal matrix in which all the diagonal entries are equal.
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,81 @@
|
||||
\section{Invariant Subspaces and the Cayley-Hamilton Theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$. A subspace $W$ of $V$ is called a \textbf{$T$-invariant subspace} of $V$ if $T(W) \subseteq W$, that is, if $T(v) \in W$ for all $v \in W$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $x$ be a nonzero vector in $V$. The subspace
|
||||
|
||||
\[W = \lspan{\{x, T(x), T^2(x), \dots\}}\]
|
||||
|
||||
is called the \textbf{$T$-cyclic subspace of $V$ generated by $x$}.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $W$ be a $T$-invariant subspace of $V$. Then the characteristic polynomial of $T_W$ divides the characteristic polynomial of $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}\label{Theorem 5.22}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $W$ denote the $T$-cyclic subspace of $V$ generated by a nonzero vector $v \in V$. Let $k = \ldim(W)$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\{v, T(v), T^2(v), \dots, T^{k-1}(v)\}$ is a basis for $W$.
|
||||
\item If $a_0v + a_1T(v) + \dots + a_{k-1}T^{k-1}(v)+T^k(v) = 0$, then the characteristic polynomial of $T_W$ is $f(t) = (-1)^k(a_o + a_1t + \dots +a_{k-1}t^{k-1}+t^k)$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}[\textbf{Cayley-Hamilton}]
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $f(t)$ be the characteristic polynomial of $T$. Then $f(T) = T_0$, the zero transformation. That is, $T$ ``satisfies" its characteristic equation.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}[\textbf{Cayley-Hamilton Theorem for Matrices}]
|
||||
\hfill\\
|
||||
Let $A$ be an $n \times n$ matrix, and let $f(t)$ be the characteristic polynomial of $A$. Then $f(A) = O$, the $n \times n$ zero matrix.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and suppose that $V = W_1 \oplus W_2 \oplus \dots \oplus W_k$, where $W_i$ is a $T$-invariant subspace of $V$ for each $i$ ($1 \leq i \leq k$). Suppose that $f_i(t)$ is the characteristic polynomial of $T_{W_i}$ ($1 \leq i \leq k$). Then $f_1(t)\cdot f_2(t) \cdot \dots \cdot f_k(t)$ is the characteristic polynomial of $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $B_1 \in M_{m \times m}(\F)$, and let $B_2 \in M_{n \times n}(\F)$. We define the \textbf{direct sum} of $B_1$ and $B_2$, denoted $B_1 \oplus B_2$, as the $(m + n) \times (m + n)$ matrix $A$ such that
|
||||
|
||||
\[A_{ij} = \begin{cases}
|
||||
(B_1)_{ij} & \text{for}\ 1 \leq i, j \leq m \\
|
||||
(B_2)_{(i-m),(j-m)} & \text{for}\ m + 1 \leq i, j \leq n + m \\
|
||||
0 & \text{otherwise.}
|
||||
\end{cases}\]
|
||||
|
||||
If $B_1, B_2, \dots, B_k$ are square matrices with entries from $\F$, then we define the \textbf{direct sum} of $B_1, B_2, \dots, B_k$ recursively by
|
||||
|
||||
\[B_1 \oplus B_2 \oplus \dots \oplus B_k = (B_1 \oplus B_2 \oplus \dots \oplus B_{k-1})\oplus B_k.\]
|
||||
|
||||
If $A= B_1 \oplus B_2 \oplus \dots \oplus B_k$, then we often write
|
||||
|
||||
\[A = \begin{pmatrix}
|
||||
B_1 & O & \dots & O \\
|
||||
O & B_2 & \dots & O \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
O & O & \dots & B_k
|
||||
\end{pmatrix}\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $W_1, W_2, \dots, W_k$ be $T$-invariant subspaces of $V$ such that $V = W_1 \oplus W_2 \oplus \dots \oplus W_k$. For each $i$, let $\beta_i$ be an ordered basis for $W_i$, and let $\beta = \beta_1 \cup \beta_2 \cup \dots \cup \beta_k$. Let $A = [T]_\beta$ and $B_i = [T_{W_i}]_{\beta_i}$ for $i = 1, 2, \dots, k$. Then $A = B_1 \oplus B_2 \oplus \dots \oplus B_k$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $W$ be a $T$-invariant subspace of $V$. Define $\overline{T}: V/W \to V/W$ by
|
||||
|
||||
\[\overline{T}(v + W) = T(v) + W\ \ \ \text{for any}\ v + W \in V/W.\]
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,223 @@
|
||||
\section{Matrix Limits and Markov Chains}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $L, A_1, A_2, \dots$ be $n \times p$ matrices having complex entries. The sequence $A_1, A_2, \dots$ is said to \textbf{converge} to the $n \times p$ matrix $L$, called the \textbf{limit} of the sequence, if
|
||||
|
||||
\[\lim_{m \to \infty}(A_m)_{ij} = L_{ij}\]
|
||||
|
||||
for all $1 \leq i \leq n$ and $1 \leq j \leq p$. To designate that $L$ is the limit of the sequence, we write
|
||||
|
||||
\[\lim_{m \to \infty}A_m = L.\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A_1, A_2, \dots$ be a sequence of $n \times p$ matrices with complex entries that converges to the matrix $L$. Then for any $P \in M_{r \times n}(\C)$ and $Q \in M_{p \times s}(\C)$,
|
||||
|
||||
\[\lim_{m \to \infty}PA_m = PL\ \ \ \ \text{and}\ \ \ \ \lim_{m \to \infty}A_mQ = LQ.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\C)$ be such that $\displaystyle\lim_{m \to \infty}A^m = L$. Then for any invertible matrix $Q \in M_{n \times n}(\C)$,
|
||||
|
||||
\[\lim_{m \to \infty}(QAQ^{-1})^m = QLQ^{-1}.\]
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be a square matrix with complex entries. Then $\displaystyle\lim_{m \to \infty}A^m$ exists if and only if both of the following conditions hold.
|
||||
|
||||
\begin{enumerate}
|
||||
\item Every eigenvalue of $A$ is contained in $S$.
|
||||
\item If $1$ is an eigenvalue of $A$, then the dimension of the eigenspace corresponding to $1$ equals the multiplicity of $1$ as an eigenvalue of $A$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\C)$ satisfy the following two conditions.
|
||||
|
||||
\begin{enumerate}
|
||||
\item Every eigenvalue of $A$ is contained in $S$.
|
||||
\item $A$ is diagonalizable.
|
||||
\end{enumerate}
|
||||
|
||||
Then $\displaystyle\lim_{m \to \infty}A^m$ exists.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
Any square matrix having the following two properties is called a \textbf{transition matrix} or a \textbf{stochastic matrix}.
|
||||
|
||||
\begin{enumerate}
|
||||
\item All entries are non-negative
|
||||
\item The sum of entries in each column sums up to $1$.
|
||||
\end{enumerate}
|
||||
|
||||
For an arbitrary $n \times n$ transition matrix $M$, the rows and columns correspond to $n$ \textbf{states}, and the entry $M_{ij}$ represents the probability of moving from state $j$ to state $i$ in one \textbf{stage}.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
A vector with non-negative entries that add up to $1$ is called a \textbf{probability vector}.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $M$ be an $n \times n$ matrix having real non-negative entries, let $v$ be a column vector in $\R^n$ having non-negative coordinates, and let $u \in \R^n$ be the column vector in which each coordinate equals $1$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $M$ is a transition matrix if and only if $M^tu = u$;
|
||||
\item $v$ is a probability vector if and only if $u^tv = (1)$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\begin{enumerate}
|
||||
\item[]
|
||||
\item The product of two $n \times n$ transition matrices is an $n \times n$ transition matrix. In particular, any power of a transition matrix is a transition matrix.
|
||||
\item The product of a transition matrix and a probability vector is a probability vector.
|
||||
\end{enumerate}
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
A process in which elements of a set are each classified as being in one of several fixed states that can switch over time is called a \textbf{stochastic process}. The switching to a particular state is described by the probability, and in general this probability depends on such factors as the state in question, the time in question, some or all of the previous states in which the object has been (including the current state), and the states that other objects are in or have been in.\\
|
||||
|
||||
If, however, the probability that an object in one state changes to a different state in a fixed interval of time depends only on the two states (and not on the time, earlier states, or other factors), then the stochastic process is called a \textbf{Markov process}. If, in addition, the number of possible states is finite, then the Markov process is called a \textbf{Markov chain}.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The vector that describes the initial probability of being in each state of a Markov chain is called the \textbf{initial probability vector} for the Markov chain.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A transition matrix is called \textbf{regular} if some power of the matrix contains only positive entries.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\C)$. For $1 \leq i, j \leq n$, define $\rho_i(A)$ to be the sum of the absolute values of the entries of row $i$ of $A$, and define $\nu_j(A)$ to be equal to the sum of the absolute values of the entries of column $j$ and $A$. Thus
|
||||
|
||||
\[\rho_i(A) = \sum_{j=1}^{n}|A_{ij}|\ \ \text{for}\ i=1, 2, \dots, n\]
|
||||
|
||||
and
|
||||
|
||||
\[\nu_j(A) = \sum_{i=1}^{n}|A_{ij}|\ \ \text{for}\ j=1, 2, \dots n.\]
|
||||
|
||||
The \textbf{row sum} of $A$, denoted $\rho(A)$, and the \textbf{column sum} of $A$, denoted $\nu(A)$, are defined as
|
||||
|
||||
\[\rho(A) = \max\{\rho_i(A) : 1 \leq i \leq n\}\ \ \ \ \text{and}\ \ \ \ \nu(A) = \max\{\nu_j(A) : 1 \leq j \leq n\}.\]
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For an $n \times n$ matrix $A$, we define the $i$th \textbf{Gerschgorin disk} $\C_i$ to be the disk in the complex plane with center $A_{ij}$ and radius $r_i = \rho_i(A) - |A_{ii}|$; that is,
|
||||
|
||||
\[\C_i = \{z \in \C:|z - A_{ii}| < r_i\}\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}[\textbf{Gerschgorin's Disk Theorem}]
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\C)$. Then ever eigenvalue of $A$ is contained in a Gerschgorin disk.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $\lambda$ be any eigenvalue of $A \in M_{n \times n}(\C)$. Then $|\lambda| \leq \rho(A)$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $\lambda$ be any eigenvalue of $A \in M_{n \times n}(\C)$. Then
|
||||
|
||||
\[|\lambda| \leq \min\{\rho(A), \nu(A)\}.\]
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
If $\lambda$ is an eigenvalue of a transition matrix, then $|\lambda| \leq 1$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Every transition matrix has $1$ as an eigenvalue.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\C)$ be a matrix in which each entry is positive, and let $\lambda$ be an eigenvalue of $A$ such that $|\lambda| = \rho(A)$. Then $\lambda = \rho(A)$ and $\{u\}$ is a basis for $E_\lambda$, where $u \in \mathsf{C}^n$ is the column vector in which each coordinate equals 1.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\C)$ be a matrix in which each entry is positive, and let $\lambda$ be an eigenvalue of $A$ such that $|\lambda| = \nu(A)$. Then $\lambda = \nu(A)$, and the dimension of $E_\lambda = 1$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\C)$ be a transition matrix in which each entry is positive, and let $\lambda$ be any eigenvalue of $A$ other than $1$. Then $|\lambda| < 1$. Moreover, the eigenspace corresponding to the eigenvalue $1$ has dimension $1$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be a regular transition matrix, and let $\lambda$ be an eigenvalue of $A$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $|\lambda| \leq 1$.
|
||||
\item If $|\lambda| = 1$, then $\lambda = 1$, and $\ldim{E_\lambda} = 1$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ be a regular transition matrix that is diagonalizable. Then $\displaystyle\lim_{m \to \infty}A^m$ exists.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}\label{Theorem 5.20}
|
||||
\hfill\\
|
||||
Let $A$ be an $n \times n$ regular transition matrix. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item The multiplicity of $1$ as an eigenvalue of $A$ is $1$.
|
||||
\item $\displaystyle\lim_{m \to \infty}A^m$ exists.
|
||||
\item $\L = \displaystyle\lim_{m \to \infty}A^m$ is a transition matrix.
|
||||
\item $AL = LA = L$.
|
||||
\item The columns of $L$ are identical. In fact, each column of $L$ is equal to the unique probability vector $v$ that is also an eigenvector of $A$ corresponding to the eigenvalue $1$.
|
||||
\item For any probability vector $w$, $\displaystyle\lim_{m \to \infty}(A^mw) = v$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The vector $v$ in \autoref{Theorem 5.20}(5) is called the \textbf{fixed probability vector} or \textbf{stationary vector} of the regular transition matrix $A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Consider transition matrices that can be represented in the following form:
|
||||
|
||||
\[\begin{pmatrix}
|
||||
I & B \\
|
||||
O & \C
|
||||
\end{pmatrix}\]
|
||||
|
||||
where $I$ is an identity matrix and $O$ is a zero matrix. (Such transition matrices are not regular since the lower left block remains $O$ in any power of the matrix.) The states corresponding to the identity submatrix are called \textbf{absorbing states} because such a state is never left once it is entered. A Markov chain is called an \textbf{absorbing Markov chain} if it is possible to go from an arbitrary state into an absorbing state in a finite number of stages.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For $A \in M_{n \times n}(\C)$, define $e^A = \displaystyle\lim_{m \to \infty}B_m$, where
|
||||
|
||||
\[B_m = I + A + \frac{A^2}{2!} + \dots + \frac{A^m}{m!}\]
|
||||
|
||||
Thus $e^A$ is the sum of the infinite series
|
||||
|
||||
\[I + A + \frac{A^2}{2!} + \frac{A^3}{3!} + \dots,\]
|
||||
|
||||
and $B_m$ is the $m$th partial sum of this series. (Note the analogy with the power series
|
||||
|
||||
\[e^a = 1 + a + \frac{a^2}{2!}+\frac{a^3}{3!}+\dots,\]
|
||||
which is valid for all complex numbers $a$.)
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,214 @@
|
||||
\section{Bilinear and Quadratic Forms}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space over a field $\F$. A function $H$ from the set $V \times V$ if ordered pairs of vectors to $\F$ is called a \textbf{bilinear form} on $V$ if $H$ is linear in each variable when the other variable is held fixed; that is, $H$ is a bilinear form on $V$ if
|
||||
|
||||
\begin{enumerate}
|
||||
\item $H(ax_1 + x_2, y) = aH(x_1,y) + H(x_2,y)$ for all $x_1, x_2, y \in V$ and $a \in \F$.
|
||||
\item $H(x,ay_1+y_2) = aH(x,y_1) + H(x,y_2)$ for all $x,y_1,y_2 \in V$ and $a \in \F$.
|
||||
\end{enumerate}
|
||||
\end{definition}
|
||||
|
||||
\begin{notation}
|
||||
\hfill\\
|
||||
We denote the set of all bilinear forms on $V$ by $\mathcal{B}(V)$.
|
||||
\end{notation}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space, let $H_1$ and $H_2$ be bilinear forms on $V$, and let $a$ be a scalar. We define the \textbf{sum} $H_1 + H_2$ and the \textbf{scalar product} $aH_1$ by the equations
|
||||
|
||||
\[(H_1 + H_2)(x,y) = H_1(x,y) + H_2(x,y)\]
|
||||
|
||||
and
|
||||
|
||||
\[(aH_1)(x,y) = a(H_1(x,y))\ \ \ \text{for all}\ x,y \in V.\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For any vector space $V$, the sum of two bilinear forms and the product of a scalar and a bilinear form on $V$ are again bilinear forms on $V$. Furthermore, $\mathcal{B}(V)$ is a vector space with respect to these operations.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
let $\beta = \{v_1, v_2, \dots, v_n\}$ be an ordered basis for an $n$-dimensional vector space $V$, and let $H \in \mathcal{B}(V)$. We can associate $H$ with an $n \times n$ matrix $A$ whose entry in row $i$ and column $j$ is defined by
|
||||
|
||||
\[A_{ij} = H(v_i, v_j)\ \ \ \ \text{for}\ i,j = 1, 2, \dots, n.\]
|
||||
|
||||
The matrix $A$ is called the \textbf{matrix representation} of $H$ with respect to the ordered basis $\beta$ and is denoted by $\psi_\beta(H)$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For any $n$-dimensional vector space $V$ over $\F$ and any ordered basis $\beta$ for $V$, $\psi_\beta : \mathcal{B}(V) \to M_{n \times n}(\F)$ is an isomorphism.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
For any $n$-dimensional vector space $V$, $\mathcal{B}(V)$ has dimension $n^2$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $V$ be an $n$-dimensional vector space over $\F$ with ordered basis $\beta$. If $H \in \mathcal{B}(V)$ and $A \in M_{n \times n}(\F)$, then $\psi_\beta(H) = A$ if and only if $H(x,y) = [\phi_\beta(x)]^tA[\phi_\beta(y)]$ for all $x,y \in V$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $\F$ be a field, $n$ a positive integer, and $\beta$ be the standard ordered basis for $F^n$. Then for any $H \in \mathcal{B}(F^n)$, there exists a unique square matrix $A \in M_{n \times n}(\F)$, names, $A = \psi_\beta(H)$, such that
|
||||
|
||||
\[H(x,y) = x^tAy\ \ \ \ \text{for all}\ x,y \in F^n.\]
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A,B \in M_{n \times n}(\F)$. Then $B$ is said to be \textbf{congruent} to $A$ if there exists an invertible matrix $Q \in M_{n \times n}(\F)$ such that $B = Q^tAQ$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional vector space with ordered bases $\beta = \{v_1, v_2, \dots, v_n\}$ and $\gamma = \{w_1, w_2, \dots, w_n\}$, and let $Q$ be the change of coordinate matrix changing $\gamma$-coordinates into $\beta$-coordinates. Then, for any $H \in \mathcal{B}(V)$, we have $\psi_\gamma(H) = Q^t\psi_\beta(H)Q$. Therefore $\psi_\gamma(H)$ is congruent to $\psi_\beta(H)$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $V$ be an $n$-dimensional vector space with ordered basis $\beta$, and let $H$ be a bilinear form on $V$. For any $n \times n$ matrix $B$, if $B$ is congruent to $\psi_\beta(H)$, then there exists an ordered basis $\gamma$ for $V$ such that $\psi_\gamma(H) = B$. Furthermore, if $B = Q^t\psi_\beta(H)Q$ for some invertible matrix $Q$, then $Q$ changes $\gamma$-coordinates into $\beta$-coordinates.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A bilinear form $H$ on a vector space $V$ is \textbf{symmetric} if $H(x,y) = H(y,x)$ for all $x,y \in V$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $H$ be a bilinear form on a finite-dimensional vector space $V$, and let $\beta$ be an ordered basis for $V$. Then $H$ is symmetric if and only if $\psi_\beta(H)$ is symmetric.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A bilinear form $H$ on a finite-dimensional vector space $V$ is called \textbf{diagonalizable} if there is an ordered basis $\beta$ for $V$ such that $\psi_\beta(H)$ is a diagonal matrix.
|
||||
\end{definition}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $H$ be a diagonalizable bilinear form on a finite-dimensional vector space $V$. Then $H$ is symmetric.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The \textbf{characteristic} of a field $\F$ is the smallest positive integer $p$ for which a sum of $p$ 1's equals $0$. If no such positive integer exists, then $\F$ is said to have \textbf{characteristic zero}.
|
||||
\end{definition}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $H$ be a nonzero symmetric bilinear form on a vector space $V$ over a field $\F$ not of characteristic two. Then there is a vector $x$ in $V$ such that $H(x,x) \neq 0$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional vector space over a field $\F$ not of characteristic two. Then every symmetric bilinear form on $V$ is diagonalizable.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $\F$ be a field that is not of characteristic two. If $A \in M_{n \times n}(\F)$ is a symmetric matrix, then $A$ is congruent to a diagonal matrix.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space over $\F$. A function $K: V \to \F$ is called a \textbf{quadratic form} if there exists a symmetric bilinear form $H \in \mathcal{B}(V)$ such that
|
||||
|
||||
\[K(x) = H(x, x)\ \ \ \text{for all}\ x \in V.\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional real inner product space, and let $H$ be a symmetric bilinear form on $V$. Then there exists an orthonormal basis $\beta$ for $V$ such that $\psi_\beta(H)$ is a diagonal matrix.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $K$ be a quadratic form on a finite-dimensional real inner product space $V$. There exists an orthonormal basis $\beta = \{v_1, v_2, \dots, v_n\}$ for $V$ and scalars $\lambda_1, \lambda_2, \dots, \lambda_n$ (not necessarily distinct) such that if $x \in V$ and
|
||||
|
||||
\[x = \sum_{i=1}^{n}s_iv_i,\ \ \ s_i \in \R\]
|
||||
|
||||
then
|
||||
|
||||
\[K(x) = \sum_{i=1}^{n}\lambda_is_i^2.\]
|
||||
|
||||
In fact, if $H$ is the symmetric bilinear form determined by $K$, then $\beta$ can be chosen to be any orthonormal basis for $V$ such that $\psi_\beta(H)$ is a diagonal matrix.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $z=f(t_1, t_2, \dots, t_n)$ be a fixed real-valued function of $n$ real variables for which all third-order partial derivatives exist and are continuous. The function $f$ is said to have a \textbf{local maximum} at point $p \in \R^n$ if there exists a $\delta > 0$ such that $f(p) \geq f(x)$ whenever $||x - p|| < \delta$. Likewise, $f$ has a \textbf{local minimum} at $p \in \R^n$ if there exists a $\delta > 0$ such that $f(p) \leq f(x)$ whenever $||x - p|| < \delta$. If $f$ has either a local minimum or a local maximum at $p$, we say that $f$ has a \textbf{local extremum} at $p$. A point $p \in \R^n$ is called a \textbf{critical point} of $f$ if $\displaystyle\frac{\partial f(p)}{\partial(t_i)} = 0$ for $i = 1, 2, \dots, n$. It is a well known fact that if $f$ has a local extremum at a point $p \in \R^n$, then $p$ is a critical point of $f$. For, if $f$ has a local extremum at $p=(p_1, p_2, \dots, p_n)$, then for any $i = 1, 2, \dots, n$, the function $\phi_i$ defined by $\phi_i(t) = f(p_1, p_2, \dots, p_{i-1}, t, p_{i+1}, \dots, p_n)$ has a local extremum at $t = p_i$. So, by an elementary single-variable argument,
|
||||
|
||||
\[\frac{\partial f(p)}{\partial t_i} = \frac{d\phi_i(p_i)}{dt} = 0.\]
|
||||
|
||||
Thus $p$ is a critical point of $f$. But critical points are not necessarily local extrema.
|
||||
|
||||
The second-order partial derivatives of $f$ at a critical point $p$ can often be used to test for local extremum at $p$. These partials determine a matrix $A(p)$ in which the row $i$, column $j$ entry is
|
||||
|
||||
\[\frac{\partial^2f(p)}{(\partial t_i)(\partial t_j)}.\]
|
||||
|
||||
This matrix is called the \textbf{Hessian matrix} of $f$ at $p$. Note that if the third order partial derivatives of $f$ are continuous, then the mixed second-order partials of $f$ at $p$ are independent of the order in which they are taken, and hence $A(p)$ is a symmetric matrix. In this case, all of the eigenvalues of $A(p)$ are real.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}[\textbf{The Second Derivative Test}]
|
||||
\hfill\\
|
||||
Let $f(t_1, t_2, \dots, t_n)$ be a real-valued function in $n$ real variables for which all third-order partial derivatives exist and are continuous. Let $p = (p_1, p_2, \dots, p_n)$ be a critical point of $f$, and let $A(p)$ be the Hessian of $f$ at $p$.
|
||||
|
||||
\begin{enumerate}
|
||||
\item If all eigenvalues of $A(p)$ are positive, then $f$ has a local minimum at $p$.
|
||||
\item If all eigenvalues of $A(p)$ are negative, then $f$ has a local maximum at $p$.
|
||||
\item If $A(p)$ has at least one positive and at least one negative eigenvalue, then $f$ has no local extremum at $p$ ($P$ is called a \textbf{saddle-point} of $f$).
|
||||
\item If $\rank{A(p)} < n$ and $A(p)$ does not have both positive and negative eigenvalues, then the second derivative test is inconclusive.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The \textbf{rank} of a bilinear form is the rank of any of its matrix representations.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}[\textbf{Sylvester's Law of Inertia}]
|
||||
Let $H$ be a symmetric bilinear form on a finite-dimensional real vector space $V$. Then the number of positive diagonal entries and the number of negative diagonal entries in any diagonal matrix representation of $H$ are each independent of the diagonal representation.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The number of positive diagonal entries in a diagonal representation of a symmetric bilinear form on a real vector space is called the \textbf{index} of the form. The difference between the number of positive and the number of negative diagonal entries in a diagonal representation of a symmetric bilinear form is called the \textbf{signature} of the form. The three terms \textit{rank}, \textit{index}, and \textit{signature} are called \textbf{invariants} of the bilinear form because they are invariant with respect to matrix representations. These same terms apply to the associated quadratic form. Notice that the values of any two of these invariants determine the value of the third.
|
||||
\end{definition}
|
||||
|
||||
\begin{corollary}[\textbf{Sylvester's Law of Inertia for Matrices}]
|
||||
\hfill\\
|
||||
Let $A$ be a real symmetric matrix. Then the number of positive diagonal entries and the number of negative diagonal entries in any diagonal matrix congruent to $A$ is independent of the choice of the diagonal matrix.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be a real symmetric matrix, and let $D$ be a diagonal matrix that is congruent to $A$. The number of positive diagonal entries of $D$ is called the \textbf{index} of $A$. The difference between the number of positive diagonal entries and the number of negative diagonal entries of $D$ is called the \textbf{signature} of $A$. As before, the \textit{rank}, \textit{index}, and \textit{signature} of a matrix are called the \textbf{invariants} of the matrix, and the values of any two of these invariants determine the value of the third.
|
||||
\end{definition}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Two real symmetric $n \times n$ matrices are congruent if and only if they have the same invariants.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
A real symmetric $n \times n$ matrix $A$ has index $p$ and rank $r$ if and only if $A$ is congruent to $J_{pr}$, defined by
|
||||
|
||||
\[J_{pr} = \begin{pmatrix}
|
||||
I_p & O & O \\
|
||||
O & -I_{r-p} & O \\
|
||||
O & O & O
|
||||
\end{pmatrix}\]
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For a $2 \times 2$ matrix $M$ with columns $x$ and $y$, the bilinear form $H(M)=H(x,y)$ is called the \textbf{permanent} of $M$.
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,65 @@
|
||||
\section{Conditioning and the Rayleigh Quotient}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For a system of linear equations $Ax = b$, two types of errors must be considered for real-world application. First, experimental errors arise in the collection of data since no instruments can provide completely accurate measurements. Second, computers introduce round-off errors. One might intuitively feel that small relative changes in the coefficients of the system cause small relative errors in the solution. A system that has this property is called \textbf{well-conditioned}; otherwise, the system is called \textbf{ill-conditioned}.
|
||||
\end{definition}
|
||||
|
||||
\begin{notation}
|
||||
\hfill\\
|
||||
Let $Ax = b$ be a system of linear equations. We use the notation $\delta b$ to denote the vector $b' - b$, where $b$ is the vector in the original system, and $b'$ is the vector in the modified system.
|
||||
\end{notation}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $Ax = b$ be a system of linear equations. We define the \textbf{relative change} in a vector $b$ to be the scalar $||\delta b||/ ||b||$, where $|| \cdot ||$ denotes the standard norm on $\C^n$ (or $\R^n$); that is, $||b|| = \sqrt{\lr{b, b}}$. Similar definitions hold for the \textbf{relative change} of $x$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
Let $A$ be a complex (or real) $n \times n$ matrix. Define the \textbf{(Euclidean) norm} of $A$ by
|
||||
|
||||
\[||A|| = \max_{x \neq 0} \frac{||Ax||}{||x||},\]
|
||||
|
||||
where $x \in \C^n$ or $x \in \R^n$.\\
|
||||
|
||||
Intuitively, $||A||$ represents the maximum \textit{magnification} of a vector by the matrix $A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
Let $B$ be an $n \times n$ self-adjoint matrix. The \textbf{Rayleigh quotient} for $x \neq 0$ is defined to be the scalar $R(x) = \lr{Bx, x}/ ||x||^2$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For a self-adjoint matrix $B \in M_{n \times n}(\F)$, we have that $\displaystyle\max_{x \neq 0}R(x)$ is the largest eigenvalue of $B$ and $\displaystyle\min_{x \neq 0}R(x)$ is the smallest eigenvalue of $B$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
For any square matrix $A$, $||A||$ is finite and, in fact, equals $\sqrt{\lambda}$, where $\lambda$ is the largest eigenvalue of $A^*A$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
For any square matrix $A$, $\lambda$ is an eigenvalue of $A^*A$ if and only if $\lambda$ is an eigenvalue of $AA^*$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ be an invertible matrix. Then $||A^{-1}|| = 1/\sqrt{\lambda}$, where $\lambda$ is the smallest eigenvalue of $A^*A$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be an invertible matrix. The number $||A||\cdot||A^{-1}||$ is called the \textbf{condition number} of $A$ and is denoted $\text{cond}(A)$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For the system $Ax = b$, where $A$ is invertible and $b \neq 0$, the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item For any norm, $||\cdot||$, we have $\displaystyle\frac{1}{\text{cond}(A)}\frac{||\delta b ||}{||b||} \leq \frac{||\delta x||}{||x||} \leq \text{cond}(A)\frac{||\delta b||}{||b||}$.
|
||||
\item If $||\cdot||$ is the Euclidean norm, then $\text{cond}(A) = \sqrt{\lambda_1/\lambda_n}$, where $\lambda_1$ and $\lambda_n$ are the largest and smallest eigenvalues, respectively, of $A^*A$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
@@ -1 +1,121 @@
|
||||
\section{Einstein's Special Theory of Relativity}
|
||||
|
||||
\begin{definition}[\textbf{Axioms of the Special Theory of Relativity}]
|
||||
\hfill\\
|
||||
The basic problem is to compare two different inertial (non-accelerating) coordinate systems $S$ and $S'$ in three-space ($\R^3$) that are in motion relative to each other under the assumption that the speed of light is the same when measured in either system. We assume that $S'$ moves at a constant velocity in relation to $S$ as measured from $S$. To simplify matters, let us suppose that the following conditions hold:
|
||||
|
||||
\begin{enumerate}
|
||||
\item The corresponding axes of $S$ and $S'$ ($x$ and $x'$, $y$ and $y'$, $z$ and $z'$) are parallel, and the origin of $S'$ moves in the positive direction of the $x$-axis of $S$ at a constant velocity $v > 0$ relative to $S$.
|
||||
\item Two clocks $C$ and $C'$ are placed in space - the first stationary relative to the coordinate system $S$ and the second stationary relative to the coordinate system $S'$. These clocks are designed to give real numbers in units of seconds as readings. The clocks are calibrated so that at the instant the origins of $S$ and $S'$ coincide, both clocks give the reading zero.
|
||||
\item The unit of length is the \textbf{light second} (the distance light travels in 1 second), and the unit of time is the second. Note that, with respect to these units, the speed of light is 1 light second per second.
|
||||
\end{enumerate}
|
||||
|
||||
Given any event (something whose position and time of occurrence can be described), we may assign a set of \textit{space-time coordinates} to it. For example, if $p$ is an event that occurs at position
|
||||
|
||||
\[\begin{pmatrix} x \\ y \\ z \end{pmatrix}\]
|
||||
|
||||
relative to $S$ and at time $t$ as read on clock $C$, we can assign to $p$ the set of coordinates
|
||||
|
||||
\[\begin{pmatrix} x \\ y \\ z \\ t \end{pmatrix}.\]
|
||||
|
||||
This ordered 4-tuple is called the \textbf{space-time coordinates} of $p$ relative to $S$ and $C$. Likewise, $p$ has a set of space-time coordinates
|
||||
|
||||
\[\begin{pmatrix} x' \\ y' \\ z' \\ t' \end{pmatrix}\]
|
||||
|
||||
relative to $S'$ and $C'$.
|
||||
|
||||
For a fixed velocity $v$, let $T_v: \R^4 \to \R^4$ be the mapping defined by
|
||||
|
||||
\[T_v \begin{pmatrix}x \\ y \\ z \\ t \end{pmatrix} = \begin{pmatrix} x' \\ y' \\ z' \\ t' \end{pmatrix},\]
|
||||
|
||||
where
|
||||
|
||||
\[\begin{pmatrix}x \\ y \\ z \\ t \end{pmatrix}\ \ \ \text{and}\ \ \ \begin{pmatrix} x' \\ y' \\ z' \\ t' \end{pmatrix}\]
|
||||
|
||||
are the space-time coordinates of the same event with respect to $S$ and $C$ and with respect to $S'$ and $C'$, respectively.
|
||||
|
||||
Einstein made certain assumptions about $T_v$ that led to his special theory of relativity. We formulate an equivalent set of assumptions.
|
||||
|
||||
\begin{enumerate}
|
||||
\item[(R 1)] The speed of any light beam, when measured in either coordinate system using a clock stationary relative to that coordinate system, is 1.
|
||||
\item[(R 2)] The mapping $T_v: \R^4 \to \R^4$ is an isomorphism.
|
||||
\item[(R 3)] If
|
||||
|
||||
\[T_v\begin{pmatrix}
|
||||
x \\ y \\ z \\ t
|
||||
\end{pmatrix} = \begin{pmatrix}
|
||||
x' \\ y' \\ z' \\ t'
|
||||
\end{pmatrix}\]
|
||||
|
||||
then $y' = y$ and $z' = z$.
|
||||
\item[(R 4)] If
|
||||
|
||||
\[T_v\begin{pmatrix}
|
||||
x \\ y_1 \\ z_1 \\ t
|
||||
\end{pmatrix} = \begin{pmatrix}
|
||||
x' \\ y' \\ z' \\ t'
|
||||
\end{pmatrix}\ \ \ \ \text{and}\ \ \ \ T_v\begin{pmatrix}
|
||||
x \\ y_2 \\ z_2 \\ t
|
||||
\end{pmatrix} = \begin{pmatrix}
|
||||
x'' \\ y'' \\ z'' \\ t''
|
||||
\end{pmatrix}\]
|
||||
then $x'' = x'$ and $t'' = t'$.
|
||||
\item[(R 5)] The origin of $S$ moves in the negative direction of the $x'$-axis of $S'$ at the constant velocity $-v < 0$ as measured from $S'$.
|
||||
\end{enumerate}
|
||||
|
||||
Axioms (R 3) and (R 4) tell us that for $p \in \R^4$, the second and third coordinates of $T_v(p)$ are unchanged and the first and fourth coordinates of $T_v(p)$ are independent of the second and third coordinates of $p$.
|
||||
|
||||
These five axioms completely characterize $T_v$. The operator $T_v$ is called the \textbf{Lorentz transformation} in direction $x$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
On $\R^4$, the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T_v(e_i) = e_i$ for $i = 2,3$.
|
||||
\item $\lspan{\{e_2, e_3\}}$ is $T_v$-invariant.
|
||||
\item $\lspan{\{e_1, e_4\}}$ is $T_v$-invariant.
|
||||
\item Both $\lspan{\{e_2, e_3\}}$ and $\lspan{\{e_1, e_4\}}$ are $T_v^*$-invariant.
|
||||
\item $T_v^*(e_i) = e_i$ for $i=2,3$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
If $\lr{L_A(w),w} = 0$ for some $w \in \R^4$, then
|
||||
|
||||
\[\lr{T_v^*L_AT_V(w), w} = 0.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
There exist nonzero scalars $a$ and $b$ such that
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T_v^*L_AT_v(w_1) = aw_2$.
|
||||
\item $T_v^*L_AT_v(w_2) = bw_1$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $B_v = [T_v]_\beta$, where $\beta$ is the standard ordered basis for $\R^4$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $B_v^*AB_v = A$.
|
||||
\item $T_v^*L_AT_v = L_A$.
|
||||
\end{enumerate}
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $\beta$ be the standard ordered basis for $\R^4$. Then
|
||||
|
||||
\[[T_V]_\beta = B_v = \begin{pmatrix}
|
||||
\frac{1}{\sqrt{1 - v^2}} & 0 & 0 & \frac{-v}{\sqrt{1 - v^2}} \\
|
||||
0 & 1 & 0 & 0 \\
|
||||
0 & 0 & 1 & 0 \\
|
||||
\frac{-v}{\sqrt{1 - v^2}} & 0 & 0 & \frac{1}{\sqrt{1 - v^2}}
|
||||
\end{pmatrix}\]
|
||||
\end{theorem}
|
||||
|
||||
@@ -1 +1,134 @@
|
||||
\section{Inner Products and Norms}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space over $\F$. An \textbf{inner product} on $V$ is a function that assigns, to every ordered pair of vectors $x$ and $y$ in $V$, a scalar in $\F$, denoted by $\lr{x, y}$, such that for all $x, y$ and $z$ in $V$ and all $c$ in $\F$, the following hold:
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\lr{x + z, y} = \lr{x, y} + \lr{z,y}$.
|
||||
\item $\lr{cx, y} = c\lr{x,y}$.
|
||||
\item $\overline{\lr{x,y}} = \lr{y,x}$, where the bar denoted complex conjugation.
|
||||
\item $\lr{x, x} > 0$ if $x \neq 0$.
|
||||
\end{enumerate}
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}\label{Definition 6.2}
|
||||
\hfill\\
|
||||
If $x$ and $y$ are in the vector space $\C^n$, define their inner product to be
|
||||
|
||||
\[\lr{x,y} = \sum_{k=1}^{n}x_k\overline{y_k}.\]
|
||||
|
||||
This is called the (complex) \textbf{standard inner product}.\\
|
||||
|
||||
When $\F = \R$ the conjugations are not needed, and in early courses this standard inner product is usually called the \textit{dot product} and is denoted by $x \cdot y$ instead of $\lr{x, y}$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}\label{Conjugate Transpose}
|
||||
\hfill\\
|
||||
Let $A \in M_{m \times n}(\F)$. We define the \textbf{conjugate transpose} or \textbf{adjoint} of $A$ to be the $n \times m$ matrix $A^*$ such that $(A^*)_{ij} = \overline(A_{ij})$ for all $i,j$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}\label{Definition 6.4}
|
||||
\hfill\\
|
||||
Given two complex number-valued $n \times m$ matrices $A$, and $B$, written explicitly as
|
||||
|
||||
\[A = \begin{pmatrix}
|
||||
A_{11} & A_{12} & \dots & A_{1m} \\
|
||||
A_{21} & A_{22} & \dots & A_{2m} \\
|
||||
\vdots & \vdots & \ddots & \vdots \\
|
||||
A_{n1} & A_{n2} & \dots & A_{nm}
|
||||
\end{pmatrix},\ \ \ \ \begin{pmatrix}
|
||||
B_{11} & B_{12} & \dots & B_{1m} \\
|
||||
B_{21} & B_{22} & \dots & B_{2m} \\
|
||||
\vdots & \vdots & \ddots & \vdots \\
|
||||
B_{n1} & B_{n2} & \dots & B_{nm}
|
||||
\end{pmatrix}\]
|
||||
|
||||
the \textbf{Frobenius inner product} is defined as,
|
||||
|
||||
\[\lr{A, B}_\F = \sum_{i,j}\overline{A_{ij}}B_{ij} = \text{tr}(A^*A)\]
|
||||
|
||||
Where the overline denotes the complex conjugate, and $A^*$ denotes the conjugate transpose (\autoref{Conjugate Transpose}). Explicitly this sum is
|
||||
|
||||
\[\begin{aligned}
|
||||
\lr{A,B}_\F = & \overline{A_{11}}B_{11} + \overline{A_{12}}B_{12} + \dots + \overline{A_{1m}}B_{1m} \\
|
||||
& + \overline{A_{21}}B_{21} + \overline{A_{22}}B_{22} + \dots + \overline{A_{2m}}B_{2m} \\
|
||||
& \vdots \\
|
||||
& + \overline{A_{n1}}B_{n1} + \overline{A_{n2}}B_{n2} + \dots + \overline{A_{nm}}B_{nm} \\
|
||||
\end{aligned}\]
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A vector space $V$ over $\F$ endowed with a specific inner product is called an \textbf{inner product space}. If $\F=\C$, we call $V$ a \textbf{complex inner product space}, whereas if $\F = \R$, we call $V$ a \textbf{real inner product space}.
|
||||
\end{definition}
|
||||
|
||||
\begin{notation}
|
||||
\hfill\\
|
||||
For the remainder of this chapter, $F^n$ denotes the inner product space with the standard inner product as defined in \autoref{Definition 6.2}. Likewise, $M_{n \times n}(F)$ denotes the inner product space with the Frobenius inner product as defined in \autoref{Definition 6.4}.
|
||||
\end{notation}
|
||||
|
||||
\begin{theorem}\label{Theorem 6.1}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space. Then for $x,y,z \in V$ and $c \in \F$ the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\lr{x, y+z} = \lr{x,y} + \lr{x,z}$.
|
||||
\item $\lr{x,cy} = \overline{c}\lr{x,y}$.
|
||||
\item $\lr{x,0} = \lr{0,x} = 0$.
|
||||
\item $\lr{x,x} = 0$ if and only if $x = 0$.
|
||||
\item If $\lr{x,y} = \lr{x,z}$ for all $x \in V$, then $y=z$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{remark}
|
||||
\hfill\\
|
||||
It should be observed that the (1) and (2) of \autoref{Theorem 6.1} show that the inner product is \textbf{conjugate linear} in the second component.
|
||||
\end{remark}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space. For $x \in V$, we define the \textbf{norm} or \textbf{length} of $x$ by\\ $||x|| = \sqrt{\lr{x,x}}$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space over $\F$. Then for all $x,y \in V$ and $c \in \F$, the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $||cx|| = |c|\cdot||x||$.
|
||||
\item $||x|| = 0$ if and only if $x = 0$. In any case, $||x|| \geq 0$.
|
||||
\item (Cauchy-Schwarz Inequality) $|\lr{x,y}| \leq ||x||\cdot||y||$.
|
||||
\item (Triangle Inequality) $||x + y|| \leq ||x|| + ||y||$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space. Vectors $x$ and $y$ in $V$ are \textbf{orthogonal} (\textbf{perpendicular}) if $\lr{x,y} = 0$. A subset $S$ of $V$ is \textbf{orthogonal} if any two distinct vectors in $S$ are orthogonal. A vector $x$ in $V$ is a \textbf{unit vector} if $||x|| = 1$. Finally, a subset $S$ of $V$ is \textbf{orthonormal} if $S$ is orthogonal and consists entirely of unit vectors.\\
|
||||
|
||||
Note that if $S = \{v_1, v_2, \dots\}$, then $S$ is orthonormal if and only if $\lr{v_i, v_j} = \delta_{ij}$, where $\delta_{ij}$ denotes the Kronecker delta. Also, observe that multiplying vectors by nonzero scalars does not affect their orthogonality and that if $x$ is any nonzero vector, then $(1/||x||)x$ is a unit vector. The process of multiplying a nonzero vector by the reciprocal of its length is called \textbf{normalizing}.
|
||||
\end{definition}
|
||||
|
||||
\begin{lemma}[\textbf{Parallelogram Law}]
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space. Then
|
||||
|
||||
\[||x + y||^2 + ||x - y||^2 = 2||x||^2 + 2||y||^2\ \ \text{for all}\ x,y \in V\]
|
||||
\end{lemma}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a vector space $\F$, where $\F$ is either $\R$ of $\C$. Regardless of whether $V$ is or is not an inner product space, we may still define a norm $||\cdot||$ as a real-values function on $V$ satisfying the following three conditions for all $x,y \in V$ and $a \in \F$:
|
||||
|
||||
\begin{enumerate}
|
||||
\item $||x|| \geq 0$, and $||x|| = 0$ if and only if $x = 0$.
|
||||
\item $||ax|| = |a| \cdot ||x||$.
|
||||
\item $||x + y|| \leq ||x|| + ||y||$.
|
||||
\end{enumerate}
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $||\cdot||$ be a norm on a vector space $V$, and define, for each ordered pair of vectors, the scalar $d(x, y) = ||x - y||$. This is called the \textbf{distance} between $x$ and $y$.
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,65 @@
|
||||
\section{Normal and Self-Adjoing Operators}
|
||||
\section{Normal and Self-Adjoint Operators}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional inner product space $V$. If $T$ has an eigenvector, then so does $T^*$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}[\textbf{Schur}]
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional inner product space $V$. Suppose that the characteristic polynomial of $T$ splits. Then there exists an orthonormal basis $\beta$ for $V$ such that the matrix $[T]_\beta$ is upper triangular.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space, and let $T$ be a linear operator on $V$. We say that $T$ is \textbf{normal} if $TT^* = T^*T$. An $n \times n$ real or complex matrix $A$ is \textbf{normal} if $AA^* = A^*A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space, and let $T$ be a normal operator on $V$. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $||T(x)|| = ||T^*(x)||$ for all $x \in V$.
|
||||
\item $T - cI$ is normal for every $c \in \F$.
|
||||
\item If $x$ is an eigenvector of $T$, then $x$ is also an eigenvector of $T^*$. In fact, if $T(x) = \lambda x$, then $T^*(x) = \overline{\lambda}x$.
|
||||
\item If $\lambda_1$ and $\lambda_2$ are distinct eigenvalues of $T$ with corresponding eigenvectors $x_1$ and $x_2$, then $x_1$ and $x_2$ are orthogonal.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional complex inner product space $V$. Then $T$ is normal if and only if there exists an orthonormal basis for $V$ consisting of eigenvectors of $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on an inner product space $V$. We say that $T$ is \textbf{self-adjoint} (\textbf{Hermitian}) if $T = T^*$. An $n \times n$ real or complex matrix $A$ is \textbf{self-adjoint} (\textbf{Hermitian}) if $A = A^*$.
|
||||
\end{definition}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $T$ be a self-adjoint operator on a finite-dimensional inner product space $V$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item Every eigenvalue of $T$ is real.
|
||||
\item Suppose that $V$ is a real inner product space. Then the characteristic polynomial of $T$ splits.
|
||||
\end{enumerate}
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional real inner product space $V$. Then $T$ is self-adjoint if and only if there exists an orthonormal basis $\beta$ for $V$ consisting of eigenvectors of $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
An $n \times n$ real matrix $A$ is said to be a \textbf{Gramian} matrix if there exists a real (square) matrix $B$ such that $A = B^tB$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A linear operator $T$ on a finite-dimensional inner product space is called \textbf{positive definite [positive semidefinite]} if $T$ is self-adjoint and $\lr{T(x),x} > 0$ [$\lr{T(x),x} \geq 0$] for all $x \neq 0$.
|
||||
|
||||
An $n \times n$ matrix $A$ with entries from $\R$ or $\C$ is called \textbf{positive definite [positive semidefinite]} if $L_A$ is positive definite [positive semidefinite].
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,69 @@
|
||||
\section{Orthogonal Projections and the Spectral Theorem}
|
||||
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space, and let $T: V \to V$ be a projection. We say that $T$ is an \textbf{orthogonal projection} if $\range{T}^\perp = \n{T}$ and $\n{T}^\perp = \range{T}$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $W$ be a finite-dimensional subspace of an inner product space $V$, and let $T$ be an orthogonal projection on $W$. We call $T$ the \textbf{orthogonal projection of $V$ on $W$}.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
A \textbf{trigonometric polynomial of degree $n$} is a function $g \in \mathsf{H}$ of the form
|
||||
|
||||
\[g(t) = \sum_{j=-n}^{n}a_jf_j(t) = \sum_{j=-n}^{n}a_je^{ijt},\]
|
||||
|
||||
where $a_n$ or $a_{-n}$ is nonzero.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space, and let $T$ be a linear operator on $V$. Then $T$ is an orthogonal projection if and only if $T$ has an adjoint $T^*$ and $T^2 = T = T^*$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}[\textbf{The Spectral Theorem}]\label{The Spectral Theorem}
|
||||
\hfill\\
|
||||
Suppose that $T$ is a linear operator on a finite-dimensional inner product space $V$ over $\F$ with the distinct eigenvalues $\lambda_1, \lambda_2, \dots, \lambda_k$. Assume that $T$ is normal if $\F = \C$ and that $T$ is self-adjoint if $\F = \R$. For each $i$ ($1 \leq i \leq k$), let $W_i$ be the eigenspace of $T$ corresponding to the eigenvalue $\lambda_i$, and let $T_i$ be the orthogonal projection of $V$ on $W_i$. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $V = W_1 \oplus W_2 \oplus \dots \oplus W_k$.
|
||||
\item If $W_i'$ denotes the direction sum of the subspaces $W_j$ for $j \neq i$, then $W_i^\perp = W_i'$.
|
||||
\item $T_iT_j = \delta_{ij}T_i$ for $1 \leq i, j \leq k$.
|
||||
\item $I = T_1 + T_2 + \dots + T_k$.
|
||||
\item $T = \lambda_1T_1 + \lambda_2T_2 + \dots + \lambda_kT_k$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
In the context of \autoref{The Spectral Theorem}:
|
||||
|
||||
\begin{enumerate}
|
||||
\item The set $\{\lambda_1, \lambda_2, \dots, \lambda_k\}$ of eigenvalues of $T$ is called the \textbf{spectrum} of $T$.
|
||||
\item The sum $I = T_1 + T_2 + \dots + T_k$ in (4) is called the \textbf{resolution of the identity operator} induced by $T$.
|
||||
\item The sum $T = \lambda_1T_1 + \lambda_2T_2 + \dots + \lambda_kT_k$ in (5) is called the \textbf{spectral decomposition} of $T$. The spectral decomposition of $T$ is unique up to the order of its eigenvalues.
|
||||
\end{enumerate}
|
||||
\end{definition}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
If $\F = \C$, then $T$ is normal if and only if $T^* = g(T)$ for some polynomial $g$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
If $\F = \C$, then $T$ is unitary if and only if $T$ is normal and $|\lambda| = 1$ for every eigenvalue of $T$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
If $\F = \C$ and $T$ is normal, then $T$ is self-adjoint if and only if every eigenvalue of $T$ is real.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be as in \autoref{The Spectral Theorem} with spectral decomposition $T = \lambda_1T_1 + \lambda_2T_2 + \dots + \lambda_kT_k$. Then each $T_j$ is a polynomial in $T$.
|
||||
\end{corollary}
|
||||
|
||||
@@ -1 +1,108 @@
|
||||
\section{The Adjoint of a Linear Operator}
|
||||
|
||||
\begin{definition}\label{Definition 6.16}
|
||||
\hfill\\
|
||||
For a linear operator $T$ on an inner product space $V$, we define a related linear operator on $V$ called the \textbf{adjoint} of $T$, whose matrix representation with respect to any orthonormal basis $\beta$ for $V$ is $[T]_\beta^*$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional inner product space over $\F$, and let $\mathsf{g}: V \to \F$ be a linear transformation. Then there exists a unique vector $y \in V$ such that $\mathsf{g}(x) = \lr{x,y}$ for all $x \in V$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional inner product space, and let $T$ be a linear operator on $V$. Then there exists a unique function $T^*: V \to V$ such that $\lr{T(x), y} = \lr{x, T^*(y)}$ for all $x,y \in V$. Furthermore, $T^*$ is linear.
|
||||
\end{theorem}
|
||||
|
||||
\begin{remark}
|
||||
\hfill\\
|
||||
\textbf{Important Note:} For the remainder of this chapter we adopt the convention that a reference to the adjoint of a linear operator on an infinite-dimensional inner product space assumes its existence.
|
||||
\end{remark}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional inner product space, and let $\beta$ be an orthonormal basis for $V$. If $T$ is a linear operator on $V$, then
|
||||
|
||||
\[[T^*]_\beta = [T]_\beta^*.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ be an $n \times n$ matrix. Then $L_{A^*} = (L_A)^*$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space, and let $T$ and $U$ be linear operators on $V$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $(T+U)^* = T^* + U^*$;
|
||||
\item $(cT)^* = \overline{c}T^*$ for any $c \in \F$;
|
||||
\item $(TU)^* = U^*T^*$;
|
||||
\item $T^{**} = T$;
|
||||
\item $I^* = I$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ and $B$ be $n \times n$ matrices. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $(A + B)^* = A^* + B^*$;
|
||||
\item $(cA)^* = \overline{c}A^*$ for all $c \in \F$;
|
||||
\item $(AB)^* = B^*A^*$;
|
||||
\item $A^{**} = A$;
|
||||
\item $I^* = I$.
|
||||
\end{enumerate}
|
||||
\end{corollary}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $A \in M_{m \times n}(\F), x \in F^n$, and $y \in F^m$. Then
|
||||
|
||||
\[\lr{Ax,y}_m = \lr{x,A^*y}_n.\]
|
||||
\end{lemma}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $A \in M_{m \times n}(\F)$. Then $\rank{A^*A} = \rank{A}$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
If $A$ is an $m \times n$ matrix such that $\rank{A} = n$, then $A^*A$ is invertible.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A \in M_{m \times n}(\F)$ and $y \in F^m$. Then there exists $x_0 \in F^n$ such that $(A^*A)x_0 = A^*y$ and $||Ax_0 = y|| \leq ||Ax - y||$ for all $x \in F^n$. Furthermore, if $\rank{A} = n$, then $x_0 = (A^*A)^{-1}A^*y$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A solution $s$ to a system of linear equations $Ax = b$ is called a \textbf{minimal solution} if $||s|| \leq ||u||$ for all other solutions $u$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A \in M_{m \times n}(\F)$ and $b \in F^m$. Suppose that $Ax = b$ is consistent. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item There exists exactly one minimal solution $s$ of $Ax = b$, and $x \in \range{L_{A^*}}$.
|
||||
\item The vector $s$ is the only solution to $Ax = b$ that lies in $\range{L_{A^*}}$; that is, if $u$ satisfies $(AA^*)u = b$, then $s = A^*u$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T: V \to W$ be a linear transformation, where $V$ and $W$ are finite-dimensional inner product spaces with inner products $\lr{\cdot, \cdot}_1$ and $\lr{\cdot, \cdot}_2$, respectively. A function $T^*: W \to V$ is called an \textbf{adjoint} of $T$ if $\lr{T(x),y}_2 = \lr{x,T^*{y}}_1$ for all $x \in V$ and $y \in W$.\\
|
||||
|
||||
This is an extension of the definition of the \textit{adjoint} of a linear operator (\autoref{Definition 6.16})
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
In physics, \textit{Hooke's law} states that (within certain limits) there is a linear relationship between the length $x$ of a spring and the force $y$ applied to (or exerted by) the spring. That is, $y = cx + d$, where $c$ is called the \textbf{spring constant}.
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,66 @@
|
||||
\section{The Geometry of Orthogonal Operators}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional real inner product space $V$. The operator $T$ is called a \textbf{rotation} if $T$ is the identity on $V$ or if there exists a two-dimensional subspace $W$ of $V$, and orthonormal basis $\beta = \{x_1, x_2\}$ for $W$, and a real number $\theta$ such that
|
||||
|
||||
\[T(x_1) = (\cos(\theta))x_1 + (\sin(\theta))x_2,\ \ \ \ T(x_2) = (-\sin(\theta))x_1 + (\cos(\theta))x_2,\]
|
||||
|
||||
and $T(y) = y$ for all $y \in W^\perp$. In this context, $T$ is called a \textbf{rotation of $W$ about $W^\perp$}. The subspace $W^\perp$ is called the \textbf{axis of rotation}.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional real inner product space $V$. The operator $T$ is called a \textbf{reflection} if there exists a one-dimensional subspace $W$ of $V$ such that $T(x) = -x$ for all $x \in W$ and $T(y) = y$ for all $y \in W^\perp$. In this context, $T$ is called a \textbf{reflection of $V$ about $W^\perp$}.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be an orthogonal operator on a two-dimensional real inner product space $V$. Then $T$ is either a rotation or a reflection. Furthermore, $T$ is a rotation if and only if $\det(T) = 1$, and $T$ is a reflection if and only if $\det(T) = -1$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $V$ be a two-dimensional real inner product space. The composite of a reflection and a rotation on $V$ is a reflection on $V$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
If $T$ is a linear operator on a nonzero finite-dimensional real vector space $V$, then there exists a $T$-invariant subspace $W$ of $V$ such that $1 \leq \ldim{W} \leq 2$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}\label{Theorem 6.46}
|
||||
\hfill\\
|
||||
Let $T$ be an orthogonal operator on a nonzero finite-dimensional real inner product space $V$. Then there exists a collection of pairwise orthogonal $T$-invariant subspaces $\{W_1, W_2, \dots, W_m\}$ of $V$ such that
|
||||
|
||||
\begin{enumerate}
|
||||
\item $1 \leq \ldim(W_i) \leq 2$ for $i = 1, 2, \dots, m$.
|
||||
\item $V = W_1 \oplus W_2 \oplus \dots \oplus W_m$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T,V,W_1,\dots,W_m$ be as in \autoref{Theorem 6.46}.
|
||||
|
||||
\begin{enumerate}
|
||||
\item The number of $W_i$'s for which $T_{W_i}$ is a reflection is even or odd according to whether $\det(T) = 1$ or $\det(T) = -1$.
|
||||
\item It is always possible to decompose $V$ as in \autoref{Theorem 6.46} so that the number of $W_i$'s for which $T_{W_i}$ is a reflection is zero or one according to whether $\det(T) = 1$ or $\det(T) = -1$. Furthermore, if $T_{W_i}$ is a reflection, then $\ldim{W_i} = 1$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be an orthogonal operator on a finite-dimensional real inner product space $V$. Then there exists a collection $\{T_1, T_2, \dots, T_m\}$ of orthogonal operators on $V$ such that the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item For each $i$, $T_i$ is either a reflection or a rotation.
|
||||
\item For at most one $i$, $T_i$ is a reflection.
|
||||
\item $T_iT_j = T_jT_i$ for all $i$ and $j$.
|
||||
\item $T = T_1T_2\dots T_m$.
|
||||
\item $\det(T) = \displaystyle\begin{cases}
|
||||
1 & \text{if}\ T_i\ \text{is a rotation for each}\ i \\
|
||||
-1 & \text{otherwise}
|
||||
\end{cases}$
|
||||
\end{enumerate}
|
||||
\end{corollary}
|
||||
|
||||
@@ -1 +1,116 @@
|
||||
\section{The Gram-Schmidt Orthogonalization Process and Orthogonal Complements}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space. A subset of $V$ is an \textbf{orthonormal basis} for $V$ if it is an ordered basis that is orthonormal.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}\label{Theorem 6.3}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space and $S=\{v_1, v_2, \dots, v_k\}$ be an orthogonal subset of $V$ consisting of nonzero vectors. If $y \in \lspan{S}$, then
|
||||
|
||||
\[y = \sum_{i=1}^{k}\frac{\lr{y,v_i}}{||v_i||^2}v_i.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
If, in addition to the hypotheses of \autoref{Theorem 6.3}, $S$ is orthonormal and $y \in \lspan(S)$, then
|
||||
|
||||
\[y = \sum_{i=1}^{k}\lr{y,v_i}v_i.\]
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space, and let $S$ be an orthogonal subset of $V$ consisting of nonzero vectors. Then $S$ is linearly independent.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be an inner product space and $S = \{w_1, w_2, \dots, w_n\}$ be a linearly independent subset of $V$. Define $S' = \{v_1, v_2, \dots, v_n\}$ where $v_1 = w_1$ and
|
||||
|
||||
\[v_k = w_k - \sum_{j=1}^{k-1}\frac{\lr{w_k,v_j}}{||v_j||^2}v_j\ \ \text{for}\ 2 \leq k \leq n.\]
|
||||
|
||||
Then $S'$ is an orthogonal set of nonzero vectors such that $\lspan{S'} = \lspan{S}$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The \textbf{Gram-Schmidt process} is a way of making two or more vectors perpendicular to each other. It is a method of constructing an orthonormal basis from a set of vectors in an inner product space. It takes a finite, linearly independent set of vectors $S = \{v_1, v_2, \dots, v_k\}$ for $k \leq n$ and generates an orthogonal set $S' = \{u_1, u_2, \dots, u_k\}$ that spans the same $k$-dimensional subspace of $\R^n$ as $S$.\\
|
||||
|
||||
The \textbf{vector projection} of a vector $v$ on a nonzero vector $u$ is defined as
|
||||
|
||||
\[\text{proj}_u(v) = \frac{\lr{v,u}}{\lr{u,u}}u,\]
|
||||
|
||||
This means that $\text{proj}_u(v)$ is the orthogonal projection of $v$ onto the line spanned by $u$. If $u$ is the zero vector, then $\text{proj}_u(v)$ is defined as the zero vector.\\
|
||||
|
||||
Given $k$ vectors $v_1, v_2, \dots, v_k$, the Gram-Schmidt process defines the vectors $u_1, u_2, \dots, u_k$ as follows:
|
||||
|
||||
\[\begin{aligned}
|
||||
& u_1 = v_1, & e_1 = \frac{u_1}{||u_1||} \\
|
||||
& u_2 = v_2 - \text{proj}_{u_1}(v_2), & e_2 = \frac{u_2}{||u_2||} \\
|
||||
& u_3 = v_3 - \text{proj}_{u_1}(v_3) - \text{proj}_{u_2}(v_3), & e_3 = \frac{u_3}{||u_3||} \\
|
||||
& u_4 = v_4 - \text{proj}_{u_1}(v_4) - \text{proj}_{u_2}(v_4) - \text{proj}_{u_3}(v_4), & e_4 = \frac{u_4}{||u_4||} \\
|
||||
& \vdots & \vdots \\
|
||||
& u_k = v_k = \sum_{j=1}^{k-1}\text{proj}_{u_j}(v_k), & e_k = \frac{u_k}{||u_k||}.
|
||||
\end{aligned}\]
|
||||
|
||||
The sequence $u_1, u_2, \dots, u_k$ is the required system of orthogonal vectors, and the normalized vectors $e_1, e_2, \dots, e_k$ form an orthonormal set.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $V$ be a nonzero finite-dimensional inner product space. Then $V$ has an orthonormal basis $\beta$. Furthermore, if $\beta = \{v_1, v_2, \dots, v_n\}$ and $x \in V$, then
|
||||
|
||||
\[x = \sum_{i=1}^{n}\lr{x,v_i}v_i.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional inner product space with an orthonormal basis $\beta = \{v_1, v_2, \dots, v_n\}$. Let $T$ be a linear operator on $V$, and let $A = [T]_\beta$. Then for any $i$ and $j$, $A_{ij} = \lr{T(v_j),v_i}$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $\beta$ be an orthonormal subset (possibly infinite) of an inner product space $V$, and let $x \in V$. We define the \textbf{Fourier coefficients} of $x$ relative to $\beta$ to be the scalars $\lr{x,y}$, where $y \in \beta$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $S$ be a nonempty subset of an inner product space $V$. We define $S^\perp$ (read ``S perp") to be the set of all vectors in $V$ that are orthogonal to every vector in $S$; that is, $S^\perp = '{x \in V : \lr{x,y} = 0,\ \forall y \in S}$. The set $S^\perp$ is called the \textbf{orthogonal complement of $S$}.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}\label{Theorem 6.6}
|
||||
\hfill\\
|
||||
Let $W$ be a finite-dimensional subspace of an inner product space $V$, and let $y \in V$. Then there exist unique vectors $u \in W$ and $z \in W^\perp$ such that $y = u + z$. Furthermore, if $\{v_1, v_2, \dots, v_k\}$ is an orthonormal basis for $W$, then
|
||||
|
||||
\[u = \sum_{i=1}^{k}\lr{y,v_i}v_i.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
In the notation of \autoref{Theorem 6.6}, the vector $u$ is the unique vector in $W$ that is ``closest" to $y$; that is, for any $x \in W$, $||y - x|| \geq ||y - u||$, and this inequality is an equality if and only if $x = u$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Suppose that $S = \{v_1, v_2, \dots, v_k\}$ is an orthonormal set in an $n$-dimensional inner product space $V$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $S$ can be extended to an orthonormal basis $\{v_1, v_2, \dots, v_k, v_{k+1}, \dots, v_n\}$ for $V$.
|
||||
\item If $W = \lspan{S}$, then $S_1 = \{v_{k+1}, v_{k+2}, \dots, v_n\}$ is an orthonormal basis for $W^\perp$ (using the preceding notation).
|
||||
\item If $W$ is any subspace of $V$, then $\ldim{V} = \ldim{W} + \ldim{W^\perp}$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}[\textbf{Parseval's Identity}]
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional inner product space over $\F$, and let $\{v_1, v_2, \dots, v_n\}$ be an orthonormal basis for $V$. Then for any $x,y \in V$,
|
||||
|
||||
\[\lr{x,y} = \sum_{i=1}^{n}\lr{x,v_i}\overline{\lr{y,v_i}}.\]
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}[\textbf{Bessel's Inequality}]
|
||||
Let $V$ be an inner product space, and let $S = \{v_1, v_2, \dots, v_n\}$ be an orthonormal subset of $V$. For any $x \in V$,
|
||||
|
||||
\[||x||^2 \geq \sum_{i=1}^{n}|\lr{x,v_i}|^2.\]
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,116 @@
|
||||
\section{The Singular Value Decomposition and the Pseudoinverse}
|
||||
|
||||
\begin{theorem}[\textbf{Singular Value Theorem for Linear Transformations}]\label{Theorem 6.26}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be finite-dimensional inner product spaces, and let $T: V \to W$ be a linear transformation of rank $r$. Then there exist orthonormal bases $\{v_1, v_2, \dots, v_n\}$ for $V$ and $\{u_1, u_2, \dots, u_m\}$ for $W$ and positive scalars $\sigma_1 \geq \sigma_2 \geq \dots \geq \sigma_r$ such that
|
||||
|
||||
\[T(v_i) = \begin{cases}
|
||||
\sigma_iu_i & \text{if}\ 1 \leq i \leq r \\
|
||||
0 & \text{if}\ i > r.
|
||||
\end{cases}\]
|
||||
|
||||
Conversely, suppose that the preceding conditions are satisfied. Then for $1 \leq i \leq n$, $v_i$ is an eigenvector of $T^*T$ with corresponding eigenvalue $\sigma_i^2$ if $1 \leq i \leq r$ and $0$ if $i > r$. Therefore the scalars $\sigma_1, \sigma_2, \dots, \sigma_r$ are uniquely determined by $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The unique scalars $\sigma_1,\sigma_2,\dots\sigma_r$ in \autoref{Theorem 6.26} are called the \textbf{singular values} of $T$. If $r$ is less than both $m$ and $n$, then the term \textit{singular value} is extended to include $\sigma_{r + 1} = \dots = \sigma_k = 0$, where $k$ is the minimum of $m$ and $n$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix. We define the \textbf{singular values} of $A$ to be the singular values of the linear transformation $L_A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}[\textbf{Singular Value Decomposition Theorem for Matrices}]\label{Theorem 6.27}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix of rank $r$ with the positive singular values $\sigma_1 \geq \sigma_2 \geq \dots \geq \sigma_r$, and let $\Sigma$ be the $m \times n$ matrix defined by
|
||||
|
||||
\[\Sigma_{ij} = \begin{cases}
|
||||
\sigma_i & \text{if}\ i = j \leq r \\
|
||||
0 & \text{otherwise}.
|
||||
\end{cases}\]
|
||||
|
||||
Then there exists an $m \times m$ unitary matrix $U$ and an $n \times n$ unitary matrix $V$ such that
|
||||
|
||||
\[A = U\Sigma V^*.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix of rank $r$ with positive singular values $\sigma_1 \geq \sigma_2 \geq \dots \geq \sigma_r$. A factorization $A = U\Sigma V^*$ where $U$ and $V$ are unitary matrices and $\Sigma$ is the $m \times n$ matrix defined as in \autoref{Theorem 6.27} is called a \textbf{singular value decomposition} of $A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For any square matrix $A$, there exists a unitary matrix $W$ and a positive semidefinite matrix $P$ such that
|
||||
|
||||
\[A = WP.\]
|
||||
|
||||
Furthermore, if $A$ is invertible, then the representation is unique.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
The factorization of a square matrix $A$ as $WP$ where $W$ is unitary and $P$ is positive semidefinite is called a \textbf{polar decomposition} of $A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be finite-dimensional inner product spaces over the same field, and let $T: V \to W$ be a linear transformation. Let $L: \n{T}^\perp \to \range{T}$ be the linear transformation defined by $L(x) = T(x)$ for all $x \in \n{T}^\perp$. The \textbf{pseudoinverse} (or \textit{Moore-Penrose generalized inverse}) of $T$, denoted by $T^\dagger$, is defined as the unique linear transformation from $W$ to $V$ such that
|
||||
|
||||
\[T^\dagger(y) = \begin{cases}
|
||||
L^{-1}(y) & \text{for}\ y \in \range{T} \\
|
||||
0 & \text{for}\ y \in \range{T}^\perp.
|
||||
\end{cases}\]
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix. Then there exists a unique $n \times m$ matrix $B$ such that $(L_A)^\dagger: F^m \to F^n$ is equal to the left-multiplication transformation $L_B$. We call $B$ the \textbf{pseudoinverse} of $A$ and denote it by $B = A^\dagger$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be an $m \times n$ matrix of rank $r$ with a singular value decomposition $A = U\Sigma V^*$ and nonzero singular values $\sigma_1 \geq \sigma_2 \geq \dots \geq \sigma_r$. Let $\Sigma^\dagger$ be the $n \times m$ matrix defined by
|
||||
|
||||
\[\Sigma_{ij}^\dagger = \begin{cases}
|
||||
\frac{1}{\sigma_i} & \text{if}\ i = j \leq r \\
|
||||
0 & \text{otherwise.}
|
||||
\end{cases}\]
|
||||
|
||||
Then $A^\dagger = V\Sigma^\dagger U^*$, and this is a singular value decomposition of $A^\dagger$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be finite-dimensional inner product spaces, and let $T: V \to W$ be linear. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T^\dagger T$ is the orthogonal projection of $V$ on $\n{T}^\dagger$.
|
||||
\item $TT^\dagger$ is the orthogonal projection of $W$ on $\range{T}$.
|
||||
\end{enumerate}
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Consider the system of linear equations $Ax = b$, where $A$ is an $m \times n$ matrix and $b \in F^m$. If $z = A^\dagger b$, then $z$ has the following properties.
|
||||
|
||||
\begin{enumerate}
|
||||
\item If $Ax = b$ is consistent, then $z$ is the unique solution to the system having minimum norm. That is, $z$ is a solution to the system, and if $y$ is any solution to the system, then $||z|| \leq ||y||$ with equality if and only if $z = y$.
|
||||
\item If $Ax = b$ is inconsistent, then $z$ is the unique best approximation to a solution having minimum norm. That is, $||Az - b|| \leq ||Ay - b||$ for any $y \in F^n$, with equality if and only if $Az = Ay$. Furthermore, if $Az = Ay$, then $||z|| \leq ||y||$ with equality if and only if $z = y$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}[\textbf{Penrose Conditions}]
|
||||
\hfill\\
|
||||
Let $V$ and $W$ be finite-dimensional inner product spaces, and let $T: V \to W$ be linear. Then the following conditions hold:
|
||||
|
||||
\begin{enumerate}
|
||||
\item $TT^\dagger T = T$.
|
||||
\item $T^\dagger TT^\dagger = T^\dagger$.
|
||||
\item Both $T^\dagger T$ and $TT^\dagger$ are self-adjoint.
|
||||
\end{enumerate}
|
||||
|
||||
These conditions characterize the pseudoinverse of a linear transformation.
|
||||
\end{lemma}
|
||||
|
||||
@@ -1 +1,130 @@
|
||||
\section{Unitary and Orthogonal Operators and Their Matrices}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional inner product space $V$ (over $\F$). If $||T(x)|| = ||x||$ for all $x \in V$, we call $T$ a \textbf{unitary operator} if $\F = \C$ and an \textbf{orthogonal operator} if $\F = \R$.\\
|
||||
|
||||
It should be noted that, in the infinite-dimensional case, an operator satisfying the preceding norm requirement is generally called an \textbf{isometry}. If, in addition, the operator is onto (the condition guarantees one-to-one), then the operator is called a \textbf{unitary} or \textbf{orthogonal operator},
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional inner product space $V$. Then the following statements are equivalent.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $TT^* = T^*T = I$.
|
||||
\item $\lr{T(x), T(y)} = \lr{x,y}$ for all $x,y \in V$.
|
||||
\item If $\beta$ is an orthonormal basis for $V$, then $T(\beta)$ is an orthonormal basis for $V$.
|
||||
\item There exists an orthonormal basis $\beta$ for $V$ such that $T(\beta)$ is an orthonormal basis for $V$.
|
||||
\item $||T(x)|| = ||x||$ for all $x \in V$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $U$ be a self-adjoint operator on a finite-dimensional inner product space $V$. If $\lr{x,U(x)} = 0$ for all $x \in V$, then $U = T_0$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional real inner product space $V$. Then $V$ has an orthonormal basis of eigenvectors of $T$ with corresponding eigenvalues of absolute value $1$ if and only if $T$ is both self-adjoint and orthogonal.
|
||||
\end{corollary}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional complex inner product space $V$. Then $V$ has an orthonormal basis of eigenvectors of $T$ with corresponding eigenvalues of absolute value $1$ if and only if $T$ is unitary.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $L$ be a one-dimensional subspace of $\R^2$. We may view $L$ as a line in the plane through the origin. A linear operator $T$ on $\R^2$ is called a \textbf{reflection of $\R^2$ about $L$} if $T(x) = x$ for all $x \in L$ and $T(x) = -x$ for all $x \in L^\perp$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A square matrix $A$ is called an \textbf{orthogonal matrix} if $A^tA = AA^t = I$ and \textbf{unitary} if $A^*A = AA^* = I$.\\
|
||||
|
||||
Since for a real matrix $A$ we have $A^* = A^t$, a real unitary matrix is also orthogonal. In this case, we call $A$ \textbf{orthogonal} rather than unitary.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
We know that, for a complex normal [real symmetric] matrix $A$, there exists an orthonormal basis $\beta$ for $F^n$ consisting of eigenvectors of $A$. Hence $A$ is similar to a diagonal matrix $D$. By \autoref{Corollary 2.8}, the matrix $Q$ whose columns are the vectors in $\beta$ is such that $D = Q^{-1}AQ$. But since the columns of $Q$ are an orthonormal basis for $F^n$, it follows that $Q$ is unitary [orthogonal]. In this case, we say that $A$ is \textbf{unitarily equivalent} [\textbf{orthogonally equivalent}] to $D$. It is easily seen that this relation is an equivalence relation on $M_{n \times n}(\C)$ [$M_{n \times n}(\R)$]. More generally, \textit{$A$ and $B$ are unitarily equivalent [orthogonally equivalent]} if and only if there exists a unitary [orthogonal] matrix $P$ such that $A = P^*BP$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be a complex $n \times n$ matrix. Then $A$ is normal if and only if $A$ is unitarily equivalent to a diagonal matrix.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ be a real $n \times n$ matrix. Then $A$ is symmetric if and only if $A$ is orthogonally equivalent to a real diagonal matrix.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}[\textbf{Schur}]
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$ be a matrix whose characteristic polynomial splits over $\F$.
|
||||
|
||||
\begin{enumerate}
|
||||
\item If $\F = \C$, then $A$ is unitarily equivalent to a complex upper triangular matrix.
|
||||
\item If $\F = \R$, then $A$ is orthogonally equivalent to a real upper triangular matrix.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a real inner product space. A function $f: V \to V$ is called a \textbf{rigid motion} if
|
||||
|
||||
\[||f(x) - f(y)|| = ||x - y||\]
|
||||
|
||||
for all $x,y \in V$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a real inner product space. A function $g: V \to V$ is called a \textbf{translation} if there exists a vector $v_0 \in V$ such that $g(x) = x + v_0$ for all $x \in V$. We say that $g$ is the \textit{translation by $v_0$}.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $f: V \to V$ be a rigid motion on a finite-dimensional real inner product space $V$. Then there exists a unique orthogonal operator $T$ on $V$ and a unique translation $g$ on $V$ such that $f = g \circ T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be an orthogonal operator on $\R^2$, and let $A = [T]_\beta$ where $\beta$ is the standard ordered basis for $\R^2$. Then exactly one of the following conditions is satisfied:
|
||||
|
||||
\begin{enumerate}
|
||||
\item $T$ is a rotation, and $\det(A) = 1$.
|
||||
\item $T$ is a reflection about a line through the origin, and $\det(A) = -1$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Any rigid motion on $\R^2$ is either a rotation followed by a translation or a reflection about a line through the origin followed by a translation.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
Consider the quadratic equation
|
||||
|
||||
\begin{equation}\label{eq:quad}
|
||||
ax^2 + 2bxy + cy^2 +dx + ey + f = 0.
|
||||
\end{equation}
|
||||
|
||||
The expression
|
||||
|
||||
\[ax^2 + 2bxy + cy^2\]
|
||||
|
||||
is called the \textbf{associated quadratic form} of \eqref{eq:quad}
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $V$ be a finite-dimensional inner product space. A linear operator $U$ on $V$ is called a \textbf{partial isometry} if there exists a subspace $W$ of $V$ such that $||U(x)|| = ||x||$ for all $x \in W$ and $U(x) = 0$ for all $x \in W^\perp$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
Let $V$ be a finite-dimensional complex [real] inner product space, and let $u$ be a unit vector in $V$. Define the \textbf{Householder} operator $\mathsf{H}_u: V \to V$ by $\mathsf{H}_u(x) = x-2\lr{x,u}u$ for all $x \in V$.
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,132 @@
|
||||
\section{The Jordan Canonical Form I}
|
||||
|
||||
\begin{definition}
|
||||
In this section, we extend the definition of eigenspace to \textit{generalized eigenspace}. From these subspaces, we select ordered bases whose union is an ordered basis $\beta$ for $V$ such that
|
||||
|
||||
\[[T]_\beta = \begin{pmatrix}
|
||||
A_1 & O & \dots & O \\
|
||||
O & A_2 & \dots & O \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
O & O & \dots & A_k
|
||||
\end{pmatrix}\]
|
||||
|
||||
where each $O$ is a zero matrix, and each $A_i$ is a square matrix of the form $(\lambda$) or
|
||||
|
||||
\[\begin{pmatrix}
|
||||
\lambda & 1 & 0 & \dots & 0 & 0 \\
|
||||
0 & \lambda & 1 & \dots & 0 & 0 \\
|
||||
\vdots & \vdots & \vdots & & \vdots & \vdots \\
|
||||
0 & 0 & 0 & \dots & \lambda & 1 \\
|
||||
0 & 0 & 0 & \dots & 0 & \lambda
|
||||
\end{pmatrix}\]
|
||||
|
||||
for some eigenvalue $\lambda$ of $T$. Such a matrix $A_i$ is called a \textbf{Jordan block} corresponding to $\lambda$, and the matrix $[T]_\beta$ is called a \textbf{Jordan canonical form} of $T$. We also say that the ordered basis $\beta$ is a \textbf{Jordan canonical basis} for $T$. Observe that each Jordan block $A_i$ is ``almost" a diagonal matrix -- in fact, $[T]_\beta$ is a diagonal matrix if and only if each $A_i$ is of the form $(\lambda)$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda$ be a scalar. A nonzero vector $x$ in $V$ is called a \textbf{generalized eigenvector of $T$ corresponding to $\lambda$} if $(T -\lambda I)^p(x) = 0$ for some positive integer $p$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda$ be an eigenvalue of $T$. The \textbf{generalized eigenspace of $T$ corresponding to $\lambda$}, denoted $K_\lambda$, is the subset of $V$ defined by
|
||||
|
||||
\[K_\lambda = \{x \in V : (T - \lambda I)^p(x) = 0\ \text{for some positive integer}\ p\}.\]
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}\label{Theorem 7.1}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda$ be an eigenvalue of $T$. then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $K_\lambda$ is a $T$-invariant subspace of $V$ containing $E_\lambda$ (the eigenspace of $T$ corresponding to $\lambda$).
|
||||
\item for any scalar $\mu \neq \lambda$, the restriction of $T - \mu I$ to $K_\lambda$ is one-to-one.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ such that the characteristic polynomial of $T$ splits. Suppose that $\lambda$ is an eigenvalue of $T$ with multiplicity $m$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\ldim{K_\lambda} \leq m$.
|
||||
\item $K_\lambda = \n{(T - \lambda I)^m}$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ such that the characteristic polynomial of $T$ splits, and let $\lambda_1, \lambda_2, \dots, \lambda_k$ be the distinct eigenvalues of $T$. Then, for every $x \in V$, there exist vectors $v_i \in K_\lambda$, $1 \leq i \leq k$, such that
|
||||
|
||||
\[x = v_1 + v_2 + \dots + v_k.\]
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}\label{Theorem 7.4}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ such that the characteristic polynomial of $T$ splits, and let $\lambda_1, \lambda_2, \dots, \lambda_k$ be the distinct eigenvalues of $T$ with corresponding multiplicities $m_1, m_2, \dots, m_k$. For $1 \leq i \leq k$, let $\beta_i$ be an ordered basis for $K_{\lambda_i}$. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\beta_i \cap \beta_j = \emptyset$ for $i \neq j$.
|
||||
\item $\beta = \beta_1 \cup \beta_2 \cup \dots \cup \beta_k$ is an ordered basis for $V$.
|
||||
\item $\ldim{K_{\lambda_i}} = m_i$ for all $i$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ such that the characteristic polynomial of $T$ splits. Then $T$ is diagonalizable if and only if $E_\lambda = K_\lambda$ for every eigenvalue $\lambda$ of $T$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $x$ be a generalized eigenvector of $T$ corresponding to the eigenvalue $\lambda$. Suppose that $p$ is the smallest positive integer for which $(T - \lambda I)^p(x) = 0$. Then the ordered set
|
||||
|
||||
\[\{(T-\lambda I)^{p-1}(x), (T-\lambda I)^{p -2}(x), \dots, (T-\lambda I)(x), x\}\]
|
||||
is called a \textbf{cycle of generalized eigenvectors} of $T$ corresponding to $\lambda$. The vectors $(T-\lambda I)^{p-1}(x)$ and $x$ are called the \textbf{initial vector} and the \textbf{end vector} of the cycle, respectively. We say that the \textbf{length} of the cycle is $p$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}\label{Theorem 7.5}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ whose characteristic polynomial splits, and suppose that $\beta$ i a basis for $V$ such that $\beta$ is a disjoint union of cycles of generalized eigenvectors of $T$. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item For each cycle $\gamma$ of generalized eigenvectors contained in $\beta$, $W = \lspan{\gamma}$ is $T$-invariant, and $[T_W]_\gamma$ is a Jordan block.
|
||||
\item $\beta$ is a Jordan canonical basis for $V$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a vector space $V$, and let $\lambda$ be an eigenvalue of $T$. Suppose that $\gamma_1, \gamma_2, \dots,\gamma_q$ are cycles of generalized eigenvectors of $T$ corresponding to $\lambda$ such that the initial vectors of the $\gamma_i$'s are distinct and form a linearly independent set. Then the $\gamma_i$'s are disjoint, and their union $\gamma = \displaystyle\bigcup_{i = 1}^q \gamma_i$ is linearly independent.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Every cycle of generalized eigenvectors of a linear operator is linearly independent.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}\label{Theorem 7.7}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $\lambda$ be an eigenvalue of $T$. Then $K_\lambda$ has an ordered basis consisting of a union of disjoint cycles of generalized eigenvectors corresponding to $\lambda$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ whose characteristic polynomial splits. then $T$ has Jordan canonical form.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$ be such that the characteristic polynomial of $A$ (and hence of $L_A$) splits. Then the \textbf{Jordan canonical form} of $A$ is defined to be the Jordan canonical form of the linear operator $L_A$ on $\F^n$.
|
||||
\end{definition}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $A$ be an $n \times n$ matrix whose characteristic polynomial splits. Then $A$ has Jordan canonical form $J$, and $A$ is similar to $J$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ whose characteristic polynomial splits. Then $V$ is the direct sum of the generalized eigenspaces of $T$.
|
||||
\end{theorem}
|
||||
|
||||
@@ -1 +1,118 @@
|
||||
\section{The Jordan Canonical Form II}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For the purposes of this section, we fix a linear operator $T$ on an $n$-dimensional vector space $V$ such that the characteristic polynomial of $T$ splits. Let $\lambda_1, \lambda_2, \dots, \lambda_k$ be the distinct eigenvalues of $T$.\\
|
||||
|
||||
By \autoref{Theorem 7.7}, each generalized eigenspace $K_{\lambda_i}$ contains an ordered basis $\beta_i$ consisting of a union of disjoint cycles of generalized eigenvectors corresponding to $\lambda_i$. So by \autoref{Theorem 7.4}(2) and \autoref{Theorem 7.5}, the union $\beta = \displaystyle\bigcup_{i=1}^k \beta_i$ is a Jordan canonical basis for $T$. For each $i$, let $T_i$ be the restriction of $T$ to $K_{\lambda_i}$, and let $A_i = [T_i]_{\beta_i}$. Then $A_i$ is the Jordan canonical form of $T_{ij}$, and
|
||||
|
||||
\[J = [T]_\beta = \begin{pmatrix}
|
||||
A_1 & O & \dots & O \\
|
||||
O & A_2 & \dots & O \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
O & O & \dots & A_k
|
||||
\end{pmatrix}\]
|
||||
|
||||
is the Jordan canonical form of $T$. In this matrix, each $O$ is a zero matrix of appropriate size.\\
|
||||
|
||||
\textbf{Note:} In this section, we compute the matrices $A_i$ and the bases $\beta_i$, thereby computing $J$ and $\beta$ as well. To aid in formulating the uniqueness theorem for $J$, we adopt the following convention: The basis $\beta_i$ for $K_\lambda$ will henceforth be ordered in such a way that the cycles appear in order of decreasing length. That is, if $\beta_i$ is a disjoint union of cycles $\gamma_1, \gamma_2, \dots, \gamma_{n_i}$ and if the length of the cycle $\gamma_j$ is $p_j$, we index the cycles so that $p_1 \geq p_2 \geq \dots \geq p_{n_i}$.\\
|
||||
|
||||
To illustrate the discussion above, suppose that, for some $i$, the ordered basis $\beta_i$ for $K_{\lambda_i}$ is the union of four cycles $\beta_i = \gamma_1 \cup \gamma_2 \cup \gamma_3 \cup \gamma_4$ with respective lengths $p_1 = 3, p_2 = 3, p_3 = 2$, and $p_4 = 1$. Then
|
||||
|
||||
\[A_i = \left(\begin{array}{*9{c}}\
|
||||
\cellcolor{Gray}\lambda_i & \cellcolor{Gray}1 & \cellcolor{Gray}0 & 0 & 0 & 0 & 0 & 0 & 0 \\
|
||||
\cellcolor{Gray}0 & \cellcolor{Gray}\lambda_i & \cellcolor{Gray}1 & 0 & 0 & 0 & 0 & 0 & 0 \\
|
||||
\cellcolor{Gray}0 & \cellcolor{Gray}0 & \cellcolor{Gray}\lambda_i & 0 & 0 & 0 & 0 & 0 & 0 \\
|
||||
0 & 0 & 0 & \cellcolor{Gray}\lambda_i & \cellcolor{Gray}1 & \cellcolor{Gray}0 & 0 & 0 & 0 \\
|
||||
0 & 0 & 0 & \cellcolor{Gray}0 & \cellcolor{Gray}\lambda_i & \cellcolor{Gray}1 & 0 & 0 & 0 \\
|
||||
0 & 0 & 0 & \cellcolor{Gray}0 & \cellcolor{Gray}0 & \cellcolor{Gray}\lambda_i & 0 & 0 & 0 \\
|
||||
0 & 0 & 0 & 0 & 0 & 0 & \cellcolor{Gray}\lambda_i & \cellcolor{Gray}1 & 0 \\
|
||||
0 & 0 & 0 & 0 & 0 & 0 & \cellcolor{Gray}0 & \cellcolor{Gray}\lambda_i & 0 \\
|
||||
0 & 0 & 0 & 0 & 0 & 0 & 0 & 0 & \cellcolor{Gray}\lambda_i
|
||||
\end{array}\right)\]
|
||||
|
||||
To help us visualize each of the matrices $A_i$ and ordered bases $\beta_i$, we use an array of dots called a \textbf{dot diagram} of $T_i$, where $T_i$ is the restriction of $T$ to $K_{\lambda_i}$. Suppose that $\beta_i$ is a disjoint union of cycles of generalized eigenvectors $\gamma_1, \gamma_2, \dots, \gamma_{n_i}$ with lengths $p_1 \geq p_2 \geq \dots \geq p_{n_i}$, respectively. The dot diagram of $T_i$ contains one dot for each vector in $\beta_i$, and the dots are configured according to the following rules.
|
||||
|
||||
\begin{enumerate}
|
||||
\item The array consists of $n_i$ columns (one column for each cycle).
|
||||
\item Counting from left to right, the $j$th column consists of the $p_j$ dots that correspond to the vectors of $\gamma_j$ starting with the initial vector at the top and continuing down to the end vector.
|
||||
\end{enumerate}
|
||||
|
||||
Denote the end vectors of the cycles by $v_1, v_2, \dots, v_{n_i}$. In the following dot diagram of $T_i$, each dot is labeled with the name of the vector in $\beta_i$ to which it corresponds.
|
||||
|
||||
\[\begin{array}{llll}
|
||||
\bullet(T - \lambda_i I)^{p_1 - 1}(v_1) & \bullet(T - \lambda_i I)^{p_2-1}(v_2) & \dots & \bullet (T-\lambda_i I)^{p_{n_i} - 1}(v_{n_i}) \\
|
||||
\bullet(T - \lambda_i I)^{p_1 - 2}(v_1) & \bullet(T - \lambda_i I)^{p2 - 2}(v_2) & \dots & \bullet(T - \lambda_i I)^{p_{n_i} - 2}(v_{n_i}) \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
& & & \bullet(T - \lambda_i I)(v_{n_i}) \\
|
||||
& & & \bullet(v_{n_i}) \\
|
||||
& \bullet(T - \lambda_i I)(v_2) & & \\
|
||||
& \bullet v_2 & & \\
|
||||
\bullet(T - \lambda_i I)(v_1) & & \\
|
||||
\bullet v_1
|
||||
\end{array}\]
|
||||
|
||||
Notice that the dot diagram of $T_i$ has $n_i$ columns (one for each cycle) and $p_1$ rows. Since $p_1 \geq p_2 \geq \dots \geq p_{n_i}$, the columns of the dot diagram become shorter (or at least not longer) as we move from left to right.
|
||||
|
||||
Now let $r_j$ denote the number of dots in the $j$th row of the dot diagram. Observe that $r_1 \geq r_2 \geq \dots \geq r_{p_1}$. Furthermore, the diagram can be reconstructed from the values of the $r_i$'s.\\
|
||||
|
||||
In the above example, with $n_i = 4$, $p_1 = p_2 = 3$, $p_3 = 2$, and $p_4 = 1$, the dot diagram of $T_i$ is as follows:
|
||||
|
||||
\[\begin{array}{llll}
|
||||
\bullet & \bullet & \bullet & \bullet \\
|
||||
\bullet & \bullet & \bullet & \\
|
||||
\bullet & \bullet & &
|
||||
\end{array}\]
|
||||
|
||||
Here $r_1 = 4$, $r_2 = 3$ and $r_3 = 2$.
|
||||
|
||||
We now devise a method for computing the dot diagram of $T_i$ using the ranks of linear operators determined by $T$ and $\lambda_i$. Hence the dot diagram is completely determined by $T$, from which it follows that it is unique. On the other hand $\beta_i$ is not unique.
|
||||
|
||||
To determine the dot diagram of $T_i$, we devise a method for computing each $r_j$, the number of dots in the $j$th row of the dot diagram, using only $T_i$ and $\lambda_i$. The next three result give us the required method. To facilitate our arguments, we fix a basis $\beta_i$ for $K_{\lambda_i}$ so that $\beta_i$ is a disjoint union of $n_i$ cycles of generalized eigenvectors with lengths $p_1 \geq p_2 \geq \dots \geq p_{n_i}$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
For any positive integer $r$, the vectors in $\beta_i$ that are associated with the dots in the first $r$ rows of the dot diagram of $T_i$ constitute a basis for $\n{(T - \lambda_i I)^r}$. Hence the number of dots in the first $r$ rows of the dot diagram equals $\nullity{(T - \lambda_i I)^r}$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
The dimension of $E_{\lambda_i}$ is $n_i$. Hence in a Jordan canonical form of $T$, the number of Jordan blocks corresponding to $\lambda_i$ equals the dimension of $E_{\lambda_i}$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $r_j$ denote the number of dots in the $j$th row of the dot diagram of $T_i$, the restriction of $T$ to $K_{\lambda_i}$. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $r_1 = \ldim{V} - \rank{T - \lambda_i I}$.
|
||||
\item $r_j = \rank{(T - \lambda_i I)^{j - 1}} - \rank{(T - \lambda_i I)^j}$ if $j > 1$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
For any eigenvalue $\lambda_i$ of $T$, the dot diagram of $T_i$ is unique. Thus, subject to the convention that cycles of generalized eigenvectors for the bases of each generalized eigenspace are listed in order of decreasing length, the Jordan canonical form of a linear operator or a matrix is unique up to the ordering of the eigenvalues.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $A$ and $B$ be $n \times n$ matrices, each having Jordan canonical forms computed according to the conventions of this section. Then $A$ and $B$ are similar if and only if they have (up to an ordering of their eigenvalues) the same Jordan canonical form.
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}
|
||||
A linear operator $T$ on a finite-dimensional vector space $V$ is diagonalizable if and only if its Jordan canonical form is a diagonal matrix. Hence $T$ is diagonalizable if and only if the Jordan canonical basis for $T$ consists of eigenvectors of $T$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A linear operator $T$ on a vector space $V$ is called \textbf{nilpotent} if $T^p = T_0$ for some positive integer $p$. An $n \times n$ matrix $A$ is called \textbf{nilpotent} if $A^p = O$ for some positive integer $p$.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
For any $A \in M_{n \times n}(\C)$, define the norm of $A$ by
|
||||
|
||||
\[||A|| = \max \{|A_{ij}| : 1 \leq i, j \leq n\}.\]
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,70 @@
|
||||
\section{The Minimal Polynomial}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
A polynomial $f(x)$ with coefficients from a field $\F$ is called \textbf{monic} if its leading coefficient is $1$. If $f(x)$ has positive degree and cannot be expressed as a product of polynomials with coefficients from $\F$ each having positive degree, then $f(x)$ is called \textbf{irreducible}.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space. A polynomial $p(t)$ is called a \textbf{minimal polynomial} of $T$ if $p(t)$ is a monic polynomial of least positive degree for which $p(T) = T_0$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $p(t)$ be a minimal polynomial of a linear operator $T$ on a finite-dimensional vector space $V$.
|
||||
|
||||
\begin{enumerate}
|
||||
\item For any polynomial $g(t)$, if $g(T) = T_0$, then $p(t)$ divides $g(t)$. In particular, $p(t)$ divides the characteristic polynomial of $T$.
|
||||
\item The minimal polynomial $T$ is unique.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$. The \textbf{minimal polynomial} $p(t)$ of $A$ is the monic polynomial of least positive degree for which $p(A) = O$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $\beta$ be an ordered basis for $V$. Then the minimal polynomial of $T$ is the same as the minimal polynomial of $[T]_\beta$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
For any $A \in M_{n \times n}(\F)$, the minimal polynomial of $A$ is the same as the minimal polynomial of $L_A$.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $p(t)$ be the minimal polynomial of $T$. A scalar $\lambda$ is an eigenvalue of $T$ if and only if $p(\lambda) = 0$. Hence the characteristic polynomial and the minimal polynomial of $T$ have the same zeros.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ with minimal polynomial $p(t)$ and characteristic polynomial $f(T)$. Suppose that $f(t)$ factors as
|
||||
|
||||
\[f(t) = (\lambda_1 - f)^{n_1}(\lambda_2 - t)^{n_2} \dots(\lambda_k - t)^{n_k},\]
|
||||
|
||||
where $\lambda_1, \lambda_2, \dots, \lambda_k$ are the distinct eigenvalues of $T$. Then there exist integers $m_1, m_2, \dots, m_k$ such that $1 \leq m_i \leq n_i$ for all $i$ and
|
||||
|
||||
\[p(t) = (t - \lambda_1)^{m_1}(t - \lambda_2)^{m_2}\dots(t - \lambda_k)^{m_k}.\]
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}\label{Theorem 7.15}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on an $n$-dimensional vector space $V$ such that $V$ is a $T$-cyclic subspace of itself. Then the characteristic polynomial $f(t)$ and the minimal polynomial $p(t)$ have the same degree, and hence $f(t) = (-1)^np(t)$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$. Then $T$ is diagonalizable if and only if the minimal polynomial of $T$ is of the form
|
||||
|
||||
\[p(t) = (t - \lambda_1)(t - \lambda_2) \dots (t - \lambda_k),\]
|
||||
|
||||
where $\lambda_1, \lambda_2, \dots, \lambda_k$ are the distinct eigenvalues of $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $x$ be a nonzero vector in $V$. The polynomial $p(t)$ is called a $T$-\textbf{annihilator} of $x$ if $p(t)$ is a monic polynomial of lest degree for which $p(T)(x) = 0$.
|
||||
\end{definition}
|
||||
|
||||
@@ -1 +1,231 @@
|
||||
\section{The rational Canonical Form}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$ with characteristic polynomial
|
||||
|
||||
\[f(t) = (-1)^n(\phi_1(t))^{n_1}(\phi_2(t))^{n_2} \dots (\phi_k(t))^{n_k},\]
|
||||
|
||||
where the $\phi_i(t)$'s ()$1 \leq i \leq k$) are distinct irreducible monic polynomials and the $n_i$'s are positive integers. For $1 \leq i \leq k$, we define the subset $K_{\phi_i}$ of $V$ by
|
||||
|
||||
\[K_{\phi_i} = \{x \in V : (\phi_i(T))^p(x) = 0\ \text{for some positive integer}\ p\}.\]
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $x$ be a nonzero vector in $V$. We use the notation $\mathsf{C}_x$ for the $T$-cyclic subspace generated by $x$. Recall \autoref{Theorem 5.22}, that if $\ldim{\mathsf{C}_x} = k$, then the set
|
||||
|
||||
\[\{x, T(x), T^2(x), \dots, T^{k-1}(x)\}\]
|
||||
|
||||
is an ordered basis for $\mathsf{C}_x$. To distinguish this basis from all other ordered bases for $\mathsf{C}_x$, we call it the $T$-\textbf{cyclic basis generated by \textit{x}} and denote it by $\beta_x$. Let $A$ be the matrix representation of the restriction of $T$ to $\mathsf{C}_x$ relative to the ordered basis $\beta_x$. Recall that
|
||||
|
||||
\[A = \begin{pmatrix}
|
||||
0 & 0 & \dots & 0 & -a_0 \\
|
||||
1 & 0 & \dots & 0 & -a_1 \\
|
||||
0 & 1 & \dots & 0 & -a_2 \\
|
||||
\vdots & \vdots & & \vdots & \vdots \\
|
||||
0 & 0 & \dots & 1 & -a_{k - 1}
|
||||
\end{pmatrix}\]
|
||||
|
||||
where
|
||||
|
||||
\[a_0x + a_1T(x) + \dots + a_{k-1}T^{k-1}(x) + T^k(x) = 0.\]
|
||||
|
||||
furthermore, the characteristic polynomial of $A$ is given by
|
||||
|
||||
\[\det(A - tI) = (-1)^k(a_0 + a_1t + \dots + a_{k-1}t^{k-1} + t^k).\]
|
||||
|
||||
The matrix $A$ is called the \textbf{companion matrix} of the monic polynomial $h(t) = a_0 + a_1t + \dots + a_{k-1}t^{k-1} + t^k$. Every monic polynomial has a companion matrix, and the characteristic polynomial of the companion matrix of a monic polynomial $g(t)$ of degree $k$ is equal to $(-1)^kg(T)$. By \autoref{Theorem 7.15}, the monic polynomial $h(t)$ is also the minimal polynomial of $A$. Since $A$ is the matrix representation of the restriction of $T$ to $\mathsf{C}_x$, $h(t)$ is also the minimal polynomial of this restriction. Note that $h(t)$ is also the $T$-annihilator of $x$.
|
||||
|
||||
It is the object of this section to prove that for every linear operator $T$ on a finite-dimensional vector space $V$, there exists an ordered basis $\beta$ for $V$ such that the matrix representation $[T]_\beta$ is of the form
|
||||
|
||||
\[\begin{pmatrix}
|
||||
C_1 & O & \dots & O \\
|
||||
O & C_2 & \dots & O \\
|
||||
\vdots & \vdots & & \vdots \\
|
||||
O & O & \dots & C_r
|
||||
\end{pmatrix},\]
|
||||
|
||||
where each $C_i$ is the companion matrix of a polynomial $(\phi(t))^m$ such that $phi(t)$ is a monic irreducible divisor of the characteristic polynomial of $T$ and $m$ is a positive integer. A matrix representation of this kind is called a \textbf{rational canonical form} of $T$. We call the accompanying basis a \textbf{rational canonical basis} for $T$.
|
||||
\end{definition}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, let $x$ be a nonzero vector in $V$, and suppose that the $T$-annihilator of $x$ is of the form $(\phi(t))^p$ for some irreducible monic polynomial $\phi(t)$. Then $\phi(t)$ divides the minimal polynomial of $T$, and $x \in K_\phi$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and let $\beta$ be an ordered basis for $V$. Then $\beta$ is a rational canonical basis for $T$ if and only if $\beta$ is the disjoint union of $T$-cyclic bases $\beta_{v_i}$, where each $v_i$ lies in $K_\phi$ for some irreducible monic divisor $\phi(t)$ of the characteristic polynomial of $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and suppose that
|
||||
|
||||
\[p(t) = (\phi_1(t))^{m_1} (\phi_2(t))^{m_2} \dots (\phi_k(t))^{m_k}\]
|
||||
|
||||
is the minimal polynomial of $T$, where the $\phi_i(t)$'s ($1 \leq i \leq k$) are the distinct irreducible monic factors of $p(t)$ and the $m_i$'s are the positive integers. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $K_{\phi_i}$ is a nonzero $T$-invariant subspace of $V$ for each $i$.
|
||||
\item If $x$ is a nonzero vector in some $K_{\phi_i}$, then the $T$-annihilator of $x$ is of the form $(\phi_i(t))^p$ for some integer $p$.
|
||||
\item $K_{\phi_i} \cap K_{\phi_j} = \{0\}$ for $i \neq j$.
|
||||
\item $K_{\phi_i}$ is invariant under $\phi_j(T)$ for $i \neq j$, and the restriction of $\phi_j(T)$ to $K_{\phi_i}$ is one-to-one and onto.
|
||||
\item $K_{\phi_i} = \n{(\phi_i(T))^{m_i}}$ for each $i$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and suppose that
|
||||
|
||||
\[p(t) = (\phi_1(t))^{m_1} (\phi_2(t))^{m_2} \dots (\phi_k(t))^{m_k}\]
|
||||
|
||||
is the minimal polynomial of $T$, where the $\phi_i$'s ($1 \leq i \leq k$) are the distinct irreducible monic factors of $p(t)$ and the $m_i$'s are the positive integers. For $1 \leq i \leq k$, let $v_i \in K_{\phi_i}$ be such that
|
||||
|
||||
\[v_1 + v_2 + \dots + v_k = 0.\]
|
||||
|
||||
Then $v_i = 0$ for all $i$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, and suppose that
|
||||
|
||||
\[p(t) = (\phi_1(t))^{m_1} (\phi_2(t))^{m_2} \dots (\phi_k(t))^{m_k}\]
|
||||
|
||||
is the minimal polynomial of $T$, where the $\phi_i$'s ($1 \leq i \leq k$) are the distinct irreducible monic factors of $p(t)$ and the $m_i$'s are the positive integers. For $1 \leq i \leq k$, let $S_i$ be a linearly independent subset of $K_{\phi_i}$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $S_i \cap S_j = \emptyset$ for $i \neq j$.
|
||||
\item $S_1 \cup S_2 \cup \dots \cup S_k$ is linearly independent.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $v_1, v_2, \dots, v_k$ be distinct vectors in $K_\phi$ such that
|
||||
|
||||
\[S_1 = \beta_{v_1} \cup \beta_{v_2} \cup \dots \cup \beta_{v_k}\]
|
||||
|
||||
is linearly independent. For each $i$, choose $w_i \in V$ such that $\phi(T)(w_i) = v_i$. Then
|
||||
|
||||
\[S_2 = \beta_{w_1} \cup \beta_{w_2} \cup \dots \cup \beta_{w_k}\]
|
||||
|
||||
is also linearly independent.
|
||||
\end{theorem}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
Let $W$ be a $T$-invariant subspace of $K_\phi$, and let $\beta$ be a basis for $W$. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item Suppose that $x \in \n{\phi(T)}$, but $x \notin W$. Then $\beta \cup \beta_x$ is linearly independent.
|
||||
\item For some $w_1, w_2, \dots, w_s$ in $\n{\phi(T)}$, $\beta$ can be extended to the linearly independent set
|
||||
|
||||
\[\beta' = \beta \cup \beta_{w_1} \cup \beta_{w_2} \cup \dots \cup \beta_{w_s},\]
|
||||
|
||||
whose span contains $\n{\phi(T)}$.
|
||||
\end{enumerate}
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
If the minimal polynomial of $T$ is of the form $p(t) = (\phi(t))^m$, then there exists a rational canonical basis for $T$.
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
$K_\phi$ has a basis consisting of the union of $T$-cyclic bases.
|
||||
\end{corollary}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Every linear operator on a finite-dimensional vector space has a rational canonical basis and, hence, a rational canonical form.
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on an $n$-dimensional vector space $V$ with characteristic polynomial
|
||||
|
||||
\[f(t) = (-1)^n(\phi_1(t))^{n_1} (\phi_2(t))^{n_2} \dots (\phi_k(t))^{n_k},\]
|
||||
|
||||
where the $\phi_i(t)$'s ($1 \leq i \leq k$) are distinct irreducible monic polynomials and the $n_i$'s are positive integers. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $\phi_1(t), \phi_2(t), \dots, \phi_k(t)$ are the irreducible monic factors of the minimal polynomial.
|
||||
\item For each $i$, $\ldim{K_{\phi_i}} = d_in_i$, where $d_i$ is the degree of $\phi_i(t)$.
|
||||
\item If $\beta$ is a rational canonical basis for $T$, then $\beta_i = \beta \cap K_{\phi_i}$ is a basis for $K_{\phi_i}$ for each $i$.
|
||||
\item If $\gamma_i$ is a basis for $K_{\phi_i}$ for each $i$, then $\gamma = \gamma_1 \cup \gamma_2 \cup \dots \cup \gamma_k$ is a basis for $V$. In particular, if each $\gamma_i$ is a disjoint union of $T$-cyclic bases, then $\gamma$ is a rational canonical basis for $T$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $\beta$ be a rational canonical basis for $T$, and $\beta_{v_1}, \beta_{v_2}, \dots, \beta_{v_k}$ be the $T$-cyclic bases of $\beta$ that are contained in $K_\phi$. Consider these $T$-cyclic bases $\beta_{v_i}$, and suppose again that the $T$-annihilator of $v_j$ is $(\phi(t))^{p_j}$. Then $\beta_{v_j}$ consists of $dp_j$ vectors in $\beta$, where $d$ is the degree of the polynomial. For $0 \leq i < d$, let $\gamma_i$ be the cycle of generalized eigenvectors of $U$ corresponding to $\lambda = 0$ with end vector $T^i(v_j)$, where $T^0(v_j) = b_j$. Then
|
||||
|
||||
\[\gamma_i = \{(\phi(T))^{p_j-1}T^i(v_j), (\phi(T))^{p_j-2}T^i(v_j), \dots, (\phi(T))T^i(v_j),T^i(v_j)\}.\]
|
||||
|
||||
By \autoref{Theorem 7.1}, $\gamma_i$ is a linearly independent subset of $\mathsf{C}_{v_i}$. Now let
|
||||
|
||||
\[\alpha_j = \gamma_0 \cup \gamma_1 \cup \dots \cup \gamma_{d - 1}.\]
|
||||
|
||||
Notice that $\alpha_j$ contains $p_jd$ vectors.
|
||||
\end{definition}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
$\alpha_j$ is an ordered basis for $\mathsf{C}_{v_j}$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{lemma}
|
||||
\hfill\\
|
||||
$\alpha$ is a Jordan canonical basis for $K_\phi$.
|
||||
\end{lemma}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on a finite-dimensional vector space $V$, let $\phi(t)$ be an irreducible monic divisor of the characteristic polynomial of $T$ of degree $d$, and let $r_i$ denote the number of dots in the $i$th row of the dot diagram for $\phi(t)$ with respect to a rational canonical basis for $T$. Then
|
||||
|
||||
\begin{enumerate}
|
||||
\item $r_1 = \displaystyle\frac{1}{d}[\ldim{V}-\rank{\phi(T)}]$
|
||||
\item $r_i = \displaystyle\frac{1}{d}[\rank{(\phi(T))^{i-1}} - \rank{(\phi(t))^i}]$ for $i > 1$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{corollary}
|
||||
\hfill\\
|
||||
Under the conventions described earlier, the rational canonical form of a linear operator is unique up to the arrangement of the irreducible monic divisors of the characteristic polynomial.
|
||||
\end{corollary}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Since the rational canonical form of a linear operator is unique, the polynomials corresponding to the companion matrices that determine this form are also unique. These polynomials, which are powers of the irreducible monic divisors, are called the \textbf{elementary divisors} of the linear operator. Since a companion matrix may occur more than once in a rational canonical form, the same is true for the elementary divisors. We call the number of such occurrences the \textbf{multiplicity} of the elementary divisor.
|
||||
|
||||
Conversely, the elementary divisors and their multiplicities determine the companion matrices and, therefore, the rational canonical form of a linear operator.
|
||||
\end{definition}
|
||||
|
||||
\begin{definition}
|
||||
\hfill\\
|
||||
Let $A \in M_{n \times n}(\F)$. The \textbf{rational canonical form} of $A$ is defined to be the rational canonical form of $L_A$. Likewise, for $A$, the \textbf{elementary divisors} and their \textbf{multiplicities} are the same as those of $L_A$.
|
||||
\end{definition}
|
||||
|
||||
\begin{theorem}[\textbf{Primary Decomposition Theorem}]
|
||||
\hfill\\
|
||||
Let $T$ be a linear operator on an $n$-dimensional vector space $V$ with characteristic polynomial
|
||||
|
||||
\[f(t) = (-1)^n(\phi_1(t))^{n_1} (\phi_2(t))^{n_2} \dots (\phi_k(t))^{n_k},\]
|
||||
|
||||
where the $\phi_i(t)$'s ($1 \leq i \leq k$) are distinct irreducible monic polynomials and the $n_i$'s are positive integers. Then the following statements are true.
|
||||
|
||||
\begin{enumerate}
|
||||
\item $V = K_{\phi_i} \oplus K_{\phi_2} \oplus \dots \oplus K_{\phi_k}$.
|
||||
\item If $T_i$ ($1 \leq i \leq k$) is the restriction of $T$ to $K_{\phi_i}$ and $C_i$ is the rational canonical form of $T_i$, then $C_1 \oplus C_2 \oplus \dots \oplus C_k$ is the rational canonical form of $T$.
|
||||
\end{enumerate}
|
||||
\end{theorem}
|
||||
|
||||
\begin{theorem}
|
||||
\hfill\\
|
||||
Let$T$ be a linear operator on a finite-dimensional vector space $V$. Then $V$ is a direct sum of $T$-cyclic subspaces $\mathsf{C}_{v_i}$, where each $v_i$ lies in $K_\phi$ for some irreducible monic divisor $\phi(t)$ of the characteristic polynomial of $T$.
|
||||
\end{theorem}
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
\newcommand{\Int}[1]{\text{int}\left(#1\right)}
|
||||
\newcommand{\cl}[1]{\text{cl}\left(#1\right)}
|
||||
\newcommand{\bd}[1]{\text{bd}\left(#1\right)}
|
||||
\newcommand{\lr}[1]{\left(#1\right)}
|
||||
\newcommand{\lr}[1]{\langle#1\rangle}
|
||||
\newcommand{\lspan}[1]{\text{span}\left(#1\right)}
|
||||
\newcommand{\ldim}[1]{\text{dim}\left(#1\right)}
|
||||
\newcommand{\nullity}[1]{\text{nullity}\left(#1\right)}
|
||||
|
||||
Reference in New Issue
Block a user