derandomization-notes/lec04.tex at main · zlangley/derandomization-notes · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
\newcommand{\fws}{\ensuremath{ f^{\text{ws}} }}

\chapter{Uniform Hardness vs.\ Randomness}
\label{lec:04}


\section{Uniform Circuits}

So far we have been studying PRGs for non-uniform circuits. As a consequence,
our results so far require problems in $\E$ that are hard for non-uniform models.
In this lecture we investigate the analogous questions for uniform models of
computation.


\begin{definition}[Uniform circuits]
  A circuit family $\{C_n\}$ is \emph{uniform} if there exists a Turing machine that on input $1^n$ outputs $C_n$.
\end{definition}

\begin{definition}[$\eps$-PRG for uniform circuits]
  An algorithm $G$ is an \emph{$\eps$-PRG for uniform circuits generated in
  time $t(n)$} if for every Turing machine $D$ with running time $t(n)$ and
  large enough $n \in \N$
	\[
		\Pr\brac{D(1^n) \text{ is an $\eps$-distinguisher for } G(1^n, u_{l(n)})} \leq \eps.
	\]
\end{definition}

\begin{definition}\label{p-sample-dist}
  Let $\overline{x} = \set{\overline{x}_n}$ be a sequence of distributions over
  $\set{0, 1}^n$ and let $p : \N \to \N$. We say that $\overline{x}$ is
  \emph{$p$-time samplable} if there is a probabilistic machine $S$ with
  runtime $p(n)$ such that $S(1^n)$ has the distribution $\overline{x}(n)$.
\end{definition}

\begin{theorem}\label{thm:DR-on-average}
  Assume that for all polynomials $p(n)$ there is an $\eps$-PRG $G$ for uniform
  circuits generated in time $p(n)$ that stretches a $\log(n)$-sized seed. Then
  for all $L \in \BPP$ and poly-time samplable distributions $\overline{x}$,%
  \footnote{Think of $\overline{x}$ as an adversary (limited to polynomial
  time) trying to generate hard inputs with $\geq \eps$ probability.} there
  exists a language $L' \in \textsf{P}$ such that
	\[
		\Pr_{x \sim \overline{x}}\brac{L(x) \neq L'(x)} \leq \eps.
	\]
\end{theorem}

\begin{proof}
	Let $S$ be the sampling algorithm for $\overline{x}$, and let $M = M_L$
	be the $\BPP$ machine for $L$. We define $L'$ as: given $x$, enumerate
	over seeds $s$ of $G$, and output
	\[ \MAJ_{s} \set{M(x, G(s)) }. \]
	Since the seed length $s$ is $O(\log n)$, the algorithm above runs in
	polynomial time, and hence $L' \in \P$.
	Further, for each $x$, we define:
	\[
		\Delta(x) = \card{
			\Pr_{r \sim \overline{u}_n}         \brac{ M(x, r) = 1}
                  - \Pr_{s \sim \overline{u}_{\ell(n)}} \brac{ M(x, G(s)) = 1}
		  }.
	\]

	We suppose (towards a contradiction) that $L'$ differs from $L$ on more
	than an $\eps$-fraction of inputs. Since $M$ is a $\BPP$ machine for $L$
	this means that
	$\Pr_{x \sim \overline{x}_n}\brac{\Delta(x) > \eps} > \eps$. Then consider
	the algorithm $D$: on the input $1^n$, sample $x \sim \overline{x}_n$ using
	$S(1^n)$, and then output the circuit $D_x(r) = M(x, r)$.
	Then
  \begin{align*}
		\Pr\brac{D \text{ prints a circuit that } \eps \text{-distinguishes } G(1^n, u_{\ell})}
    &= \Pr\brac{\Delta(S(1^n)) > \eps}\\
    &= \Pr_{x \sim \overline{x}_n}\brac{\Delta(x) > \eps}\\
    &> \eps,
  \end{align*}
	which contradicts the fact that $G$ is an $\eps$-PRG.
\end{proof}

\comment{This bit needs some exposition between the theorems / conjecture.}

\begin{theorem}
	If
	$\SPACE\brac{O(n)}$ is hard for $\BPTIME\brac{2^{\eps n}}$
	then
	$\RP = \P$ ``on average''.
\end{theorem}


\begin{theorem}
	There exists a language $\fws \in \P^{\sharpp}$ such that if $\fws$
	is hard for $\BPTIME\brac{2^{\eps n}}$ then
	$\BPP = \P$ ``on average''.
\end{theorem}

The direct analogue of the hardness to derandomization result for non-uniform
circuits from Lecture~\ref{lec:03} is the following conjecture:

\begin{conjecture}
	There exists an $\eps > 0$, such that $\E$ is hard for
	$\BPTIME\brac{2^{\eps n}}$ if and only if for all polynomials $p(n)$ there
	is a $(1 / n)$-PRG for uniform circuits generated in time $p(n)$ with
	runtime $\poly(n)$ that stretches a $\log(n)$ sized seed.
\end{conjecture}

\section{Learning via Uniform Distinguishers}

\comment{Restate / refer to the Nisan-Wigderson PRG here.}

\begin{definition}
  Let $f_n : \set{0, 1}^n \to \set{0, 1}.$ A probabilistic algorithm $A$ with
  oracle access to $f_n$ \emph{learns $f_n$ with success $\delta$ and advantage
  $\eps$} if on input $1^n$, $A$ produces a circuit $C_n$ such that
  $\Pr_{x \sim \{0, 1\}^n} \brac{C_n(x) = f_n(x)} \geq 1/2 + \eps$ with
  probability $1 - \delta$.
\end{definition}

\begin{proposition}\label{prop:learning}
  Let $\ell = \log{n}$, let $\gamma \in (0, 1/2)$, and let $T_1, \dots, T_n$ be
  a $(\ell, \gamma \ell)$-design over $[O(\ell)]$. Finally, let $f :
  \set{0,1}^\ell \to \set{0, 1}$ and let $G^f$ be the Nisan-Wigderson PRG with
  oracle $f$ and design $T_1, \dots, T_n$. If $D$ is a uniform circuit on $n$
  inputs generated in time $\poly(n)$ that is not $(1/n)$-fooled by $G^f$, then
  there is a $\poly(n)$-time algorithm to learn $f$ with advantage $1/n^2$ and
  success $1/n$.
\end{proposition}

\begin{proof}
  By definition, if $D$ is not $(1/n)$-fooled by $G^f$, then
  \[
    \card{\Pr\brac{D(G(1^n, s)) = 1} - \Pr\brac{D(\overline{u}_n) = 1}} > 1/n.
  \]

  As in the hybrid argument (\comment{ref. lecture 2 here}), we define the
  distributions
  \begin{align*}
    \overline{H}_0 &= (f(x_1), \ldots, f(x_n)),\\
    \overline{H}_i &= (\overline{u}_i, f(x_{i + 1}), \ldots, f(x_n)),\\
    \overline{H}_n &= \overline{u}_n,
  \end{align*}
  where $x_i$ is the projection of the seed given to the PRG onto the
  coordinates $T_i$. Recall the (easy) lemma that there is an $i$ such that:
  \[
    \card{\Pr\brac{D(\overline{H}_{i - 1}) = 1} - \Pr\brac{D(\overline{H}_i) = 1}} > 1 / n^2.
  \]
  Roughly speaking, the learning algorithm will guess the value of this $i$ and
  exploit the fact that $D$ can differentiate the bit $f(x_i)$ from a uniformly
  random bit. More concretely, we describe the algorithm $A$:
  \begin{enumerate}
    \item Construct the a $(\ell, \gamma \ell)$-design $T_1, \dots, T_n$ in
      time $\poly(n)$ using Theorem~\ref{thm:design} from
      Lecture~\ref{lec:02}.
    \item Choose a random $i \in \brac{n}$.
    \item Choose a random $z^* \in \set{0, 1}^{\ell(n) - \ell}$. Here $s$
      (the seed of the PRG) projected to $T_i$ will be equal to $x_i$,
      and the remaining coordinates will be filled by $z^*$.
    \item Choose a random bit $\sigma$.
    \item Query the oracle for $f$ on all possible inputs
      $x_{i + 1}, \ldots , x_n$. Note that at most $\ell / 100$ bits of
      each of these $x_j$'s are unknown after fixing $z^*$, so there
      are at most $n \cdot 2^{\ell / 100}$ queries to the oracle.
    \item Choose a random $u^* \in \set{0, 1}^{i - 1}$.
    \item Output the circuit that takes input $x_i$ and simulates
      $D$ on $(u^*, \sigma, f(x_{i + 1}), \ldots , f(x_n))$ and outputs
      $\lnot \sigma$ if $D$ accepts, and $\sigma$ otherwise.
  \end{enumerate}
  To complete the proof, we claim:

  \begin{claim}
    With probability $1 / n$, $\Pr\brac{C_n(x) = f_n(x)} = 1/2 + 1/n^2$.
  \end{claim}
  To see the claim, suppose without loss of generality that the probability
  that $D$ accepts on an input sampled from $\overline{H}_{i}$ is larger
  than the probability it accepts on one from $\overline{H}_{i - 1}$.
  For brevity, define the distribution $\overline{H}'_{i - 1}$ which differs
  from $\overline{H}_{i - 1}$ only in the $i$-th coordinate, where it is
  $\lnot f(x_i)$.
  Then note that since the distributions $\overline{H}_i$ and
  $\overline{H}_{i - 1}$ differ only in their $i$-th bit, and this bit is
  either $f(x_i)$ or $\lnot f(x_i)$ with equal probability in
  $\overline{H}_i$:
  \[
    \Pr\brac{D(\overline{H}_{i}) = 1}
    = \frac 12 \Pr\brac{D(\overline{H}_{i - 1}) = 1}
    + \frac 12 \Pr\brac{D(\overline{H}'_{i - 1}) = 1}.
  \]
  But this means that $\Pr\brac{D(\overline{H}'_{i - 1}) = 1} >
  \Pr\brac{D(\overline{H}_{i - 1}) = 1} + 2/n^2$.
  Now, $C_n$ outputs $f(x_i)$ correctly if either:
  \begin{itemize}
    \item $\sigma = f(x_i)$, and $D$ rejects, which happens with
      probability $1/2 \cdot \Pr\brac{D(\overline{H}_{i - 1}) = 0}$.
    \item $\sigma \neq f(x_i)$ and $D$ accepts, which happens with
      probability $1/2 \cdot \Pr\brac{D(\overline{H}'_{i - 1}) = 1}$.
  \end{itemize}
  And hence the probability that $C_n(x) = f_n(x)$ is
  \[
    \frac 12 \paren*{
      \Pr\brac{D(\overline{H}_{i - 1}) = 0} +
      \Pr\brac{D(\overline{H}'_{i - 1}) = 1}
      }
      > 1/2 + 1/n^2.
    \]
\end{proof}

\begin{corollary}
  \label{corr:learning-boost-success}
  If there is a $(1/n)$-distinguisher for the NW-PRG $G^f$, then there is an
  algorithm that learns $f$ with success $1 - 1/\delta$ and advantage
  $1/(2n^2)$ that runs in time $\poly(n, \log(1/\delta))$.
\end{corollary}

\begin{proof}
  Let $A$ be the learning algorithm for $f$ with success $1/n$ and advantage
  $1/n^2$ promised by Proposition \ref{prop:learning}. By running $A$
  independently $n \log(1/\delta)$ times, the probability that none of the
  circuits produced has advantage $1/n^2$ is $1 - (1 - 1/n)^{n\log(1/\delta)} >
  1 - \delta$. We can estimate the acceptance probability of each of the
  $n\log(1/\delta)$ circuits to within an additive error of $1/(2n^2)$ with
  probability $1 - \delta$ by running it on $O(n^4 \log(1/\delta))$ random
  inputs. Hence if such a circuit exists, we will find it in time $\poly(n,
  1/\delta)$. Reparameterizing $\delta$ gives the result.
\end{proof}

\section{Bootstrapping via Downward Self-Reducibility and LLDCs}

\begin{definition}
  A function family $f_n : \set{0, 1}^n \to \set{0, 1}$ is \emph{downward self-reducible (DSR)} if there exists an algorithm $A$
  such that on input $x \in \set{0, 1}^n$ and with oracle access to $f_{n-1}$, $A$ computes $f_n(x)$ in linear-time.
  \footnote{Linear time is not critical, it can be replaced (for example) by quadratic time ... but not $\poly$-time
  (???).}
\end{definition}

For example, $\SAT$ and $\PERM$ are DSR.

\commenta{Zach}{I'm not sure that the statement ``$\SAT$ is DSR'' type-checks.}

\begin{proposition}
  There exists a function $\fws \in \P^{\sharpp}$ such that:
  \begin{enumerate}
    \item $\fws$ is DSR.
    \item For all $\ell \in \N$ the truth table of $\fws$ is a codeword
      which is:
      \begin{enumerate}
        \item Locally List Decodable from agreement $\eps(\ell) = 2^{-\ell /
          100}$ in time $\poly(1/\eps)$ with list size $\poly(1 / \eps)$.
        \item Uniquely Decodable from agreement $0.99$ in time $\poly(n)$.
      \end{enumerate}
  \end{enumerate}
\end{proposition}

\begin{theorem}\label{thm:uniform-hard-random-sharpP}
  If $\fws$ is hard for $\BPTIME\brac{2^{\eps n}}$ then for all polynomials
  $p(n)$ there is a $(1 / n)$-PRG for uniform circuits generated in time $p(n)$
  with $\log$-sized seed and polynomial running time.
\end{theorem}

\begin{proofsk}
  The proof is by reconstruction: Assume there is some uniform distinguisher
  $D$ that is not fooled by the PRG. We will use $D$ to give a
  $\BPTIME\brac{2^{\eps n}}$ algorithm for $\fws$.

  Let $x \in \{0, 1\}^n$; we want to compute $\fws(x)$ efficiently. The
  algorithm iteratively constructs circuits $F_i$ that computes $\fws$ on all
  inputs of length $i$. We can compute $F_1$ in constant time. Now describe how
  to compute $F_i$ from $F_{i-1}$:
  \begin{enumerate}
    \item Assume that $F_{i - 1}$ computes $\fws_{i - 1}$.
    \item Let $A$ be the learning algorithm for
      Corollary~\ref{corr:learning-boost-success} with $\delta = 1/n^2$. Run
      $A$ on input $1^i$, answering queries $\fws_i(x)$ using the downward
      self-reducibility of $\fws$ and the circuit $F_{i - 1}$.
    \item With high probability, $A$ returns a circuit that agrees with
      $\fws_i$ on a $1/2 + 1/i^2$ fraction of the inputs.
    \item Compute a list of circuits via the LLDC decoder with the previous
      circuit as the corrupt codeword.
    \item Using random sampling and the downward self-reducibility of $\fws$
      and $F_{i - 1}$, find a circuit that agrees with $\fws_i$ on a $0.99$
      fraction of the inputs.
    \item Combine this with a local unique decoder.
  \end{enumerate}
  To tie this up, observe that there are $\ell$ steps of the loop above, and in
  each step we run in time $\poly(2^{\ell / c})$ and output a circuit of size
  $\poly(2^{\ell / c})$.
\end{proofsk}

\section*{The Upshot}

\begin{enumerate}
  \item Uniform distinguishers for the NW-PRG with hard function $f$ can be
    used to learn $f$ with $1/\poly(n)$ advantage with high probability.
  \item If the hard function $f$ has sufficient structure, the $1/\poly(n)$
    advantage can be used to bootstrap a reconstruction of $f$.
  \item LLDCs and downward self-reducibility play a crucial role in the
    bootstrapping algorithm.
\end{enumerate}