\documentclass[reqno]{amsart}
\usepackage{hyperref}

\AtBeginDocument{{\noindent\small
\emph{Electronic Journal of Differential Equations},
Vol. 2010(2010), No. 97, pp. 1--13.\newline
ISSN: 1072-6691. URL: http://ejde.math.txstate.edu or http://ejde.math.unt.edu
\newline ftp ejde.math.txstate.edu}
\thanks{\copyright 2010 Texas State University - San Marcos.}
\vspace{9mm}}

\begin{document}
\title[\hfilneg EJDE-2010/97\hfil
 Filippov approach in stochastic maximum principle]
{Filippov approach in stochastic maximum principle without
differentiability assumptions}

\author[M. Hafayed\hfil EJDE-2010/97\hfilneg]
{Mokhtar Hafayed}

\address{Mokhtar Hafayed \newline
Laboratory of Applied Mathematics, University of Med-Khider,
PO Box 145, Biskra (7000), Algeria}
\email{hafa.mokh@yahoo.com}

\thanks{Submitted April 15, 2010. Published July 15, 2010.}
\subjclass[2000]{60H10, 34F05}
\keywords{Stochastic differential equation; generalized Filippov's
solutions; \hfill\break\indent
optimal control; maximum principlel Ekeland's variational principle}

\begin{abstract}
 In this article, we establish necessary conditions for optimality in
 stochastic control of systems governed by stochastic differential
 equations with nonsmooth coefficients. The approach used is based
 on the approximation of the nonsmooth coefficient by smooth one
 which generate a sequence of smooth control problems.
 Ekeland's variational principle is then applied to obtain a sequence
 of nearly optimal controls which satisfy necessary conditions for
 near optimality. By using the generalized notion of Filippov's
 solutions and the stable convergence, we obtain an explicit
 formula for the adjoint process and the inequality between the
 Hamiltonians, on a good extension of the initial filtered probability
 space.
\end{abstract}

\maketitle
\numberwithin{equation}{section}
\newtheorem{theorem}{Theorem}[section]
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{remark}[theorem]{Remark}
\allowdisplaybreaks

\section{Introduction}

We study a stochastic control problem where the system is governed by a
nonlinear stochastic differential equation (SDE for short) of the form
\begin{equation}
\begin{gathered}
dX_t=b(t,X_t,u_t)dt+\sigma (t,X_t)dB_t,\\
X_0=x.
\end{gathered}  \label{e1.1}
\end{equation}
Where $B_t$ is a $d$-dimensional Brownian motion defined on the filtered
probability space $(\Omega ,\mathcal{F} ,\mathcal{F}_t,\mathbb{P})$.
The finite horizon cost function to be minimized over admissible
controls is given by
\begin{equation}
J(u)=\mathbb{E}(g(X_T)) \label{e1.2}
\end{equation}
where $u$ is an admissible control and $X_T$ is a diffusion process
solution of \eqref{e1.1} at the terminal time $T$.
A control $\hat{u}\in \mathcal{U}_{\rm ad}$ is called optimal
if it satisfies $J(\hat{u})=\inf_{u\in \mathcal{U}_{\rm ad}}\{ J(u)\} $.
The corresponding state trajectory $\hat{X}$ and $(\hat{X},\hat{u})$ are
called an optimal state trajectory and optimal pair respectively.

 The stochastic maximum principle (SMP in short) has been and
remains an important tool in the many areas in which optimal  control plays
a role. Pontryagin \textit{et al} \cite{p2} announced the maximum
principle for the first time. Kushner \cite{k1} employed the spike
variation and Neustadt's variational principle to derive a stochastic
maximum principle. On the other hand, Haussmann \cite{h1}
extensively investigated the necessary conditions of stochastic optimal
state feed-back controls based on the Girsanov's transformation.\newline
The case of stochastic systems with nonsmooth coefficients has been
treated in \cite{b1,b2,m1,w1}.  Bahlali \textit{et al} \cite{b1}
employed the Krylov's inequality to derive a stochastic maximum principle
with nonsmooth
coefficients and nondegenerate diffusion. Necessary conditions
for optimality for degenerate diffusion with
nonsmooth coefficient is established by Bahlali \textit{et al}
\cite{b2}.

 The necessary conditions for optimality for diffusion with
nonsmooth drift, has been solved by Mezerdi \cite{m1} by using
Clarke's generalized gradient and stable convergence of probability
measures. A difficulty is treating the case where the diffusion
coefficient $\sigma $ contains the control variable $u$.
Among those works one can see \cite{a2,b3,p1}.
Peng \cite{p1} introduced
the second-order adjoint equation and obtained the
maximum principle in which the control enters both the drift and the
diffusion coefficients where the set of controls is not necessarily
convex.

A good account and an extensive list of references on the maximum
principle and optimal stochastic control can be founded in
Yong \textit{et al} \cite{y1}. Filippov \cite{f1}
 has developed a solution concept
for ordinary differential equations (ODEs in short) with a discontinuous
right-hand side. When a function $V$ is locally Lipschitz continuous, the
associated Filippov differential inclusion is equal to the Clarke's
generalized gradient of $V$.

The main contribution of the present paper is to extend the stochastic maximum principle
to the case where the drift and the diffusion coefficients are nonsmooth
in the sense that they are only Lipschitz continuous and satisfy a linear growth
condition. Our approach is to express a generalized derivative of $b$ and
$\sigma $ using a Filippov differential inclusion type argument in terms of
well defined smooth approximations, and stable convergence of probability
measures to caracterize the first order adjoint equation.
A similar type of stochastic maximum principle
has primarily been derived in Mezerdi \cite{m1} with
non-differentiable drift using Clarke's generalized gradient.

 The novelty in our maximum principle is based on the advantage of
the Filippov's approach which allows to express the generalized gradient
in terms of the underlying approximating sequence $b_{x}^n$ and
$\sigma_{x}^n$ constructed in Section 5, a property that is not
explicit in Clarke's approach.

The rest of the paper is organized as follows. In the section 2,
we present the formulation of the problem. Section 3 is devoted
to the classical maximum principle.\ In section 4, we give some
proprieties of Filippov notion. Section 5 contains our main result
where we give a generalized stochastic maximumu principle for our
stochastic control problems.

\section{Problem Formulation and Preliminaries}

Throughout this paper, we assume $(\Omega ,\mathcal{F} ,\mathcal{F}
_t,\mathbb{P})$ is a filtered probability space and $B_t$ an
Brownian motion with values in $\mathbb{R}^d $. Let $\mathbb{A}$ be a
Borelian subset of $\mathbb{R}^n$, $u_t$ is called an admissible
control if it is measurable and $\mathcal{F}_t$-adapted with values
in $\mathbb{A}$. We denote $\mathcal{U}_{\rm ad}$ the space of
admissible controls, $B_t^{j}$
the $j^{th}$ column of $B_t$ and $\sigma ^{j}$ the $j^{th}$ column of
the matrix $\sigma$.
Let $b:[ 0,T] \times \mathbb{R}^d \mathbb{\times A}\to
\mathbb{R}^d $ and the diffusion matrix $\sigma :[0,T]\times
\mathbb{R}^d \to \mathbb{R}^d \otimes \mathbb{R}^d $ are Borelian
functions such that: for all $(t,x,y,u)\in [0,T]
\times \mathbb{R}^d \times \mathbb{R}^d \times \mathbb{A}$, there
exists positive constants $K$ and $c$ such that
\begin{gather}
|\sigma (t,x)-\sigma (t,y)| +|b(t,x,u)-b(t,y,u)|\leq K|x-y|,   \label{e2.1}
\\
|\sigma (t,x)|+|b(t,x,u)|\leq c(1+|x|), \label{e2.2}
\\
b(t,x,.):\mathbb{A}\to \mathbb{R}^d \text{ is continuous.}   \label{e2.3}
\end{gather}
From assumptions \eqref{e2.1} and \eqref{e2.2} it is easy to
see that equation \eqref{e1.1} satisfies the usual It\^{o}
conditions; therefore it has a unique strong solution such that for any
$q\geq 1:$
\[
\mathbb{E}[\sup_{t\leq T} |X_t|^{q}]<+\infty .
\]
The cost function to be minimized define by \eqref{e1.2}
satisfies the following condition
\begin{equation}
g:\mathbb{R}^d \to \mathbb{R}\text{ is continuously differentiable,}
 \label{e2.4}
\end{equation}
such that $|g(X)|\leq c[1+| X|]$ and $|g_{x}(X)|\leq M$;
where $g_{x}$ denote the gradient of $g$ at $x$. Finaly throughout this
paper, we assume that an optimal control $\hat{u}$ is supposed to exist.

\section{Classical stochastic maximum principle}

In the regular case, the control problem based to defined an admissible
control $\hat{u}$ which minimizes a cost $J(u)$. The conditions
must satisfied by the control $\hat{u}$ which is supposed to exists, are
called the stochastic maximum principle. In this case we assume
\begin{equation}
b(t,.,u),\sigma ^{j}(t,.):\mathbb{R}
^d \to \mathbb{R}^d \text{ is continuously differentiable.}   \label{e3.1}
\end{equation}
To obtain these necessary conditions for optimality, we compare
$\hat{u}$ with controls which are strong perturbations defined by
\[
u_{h}(t)=\begin{cases}
v &\text{if }t\in [t_0,t_0+h], \\
\hat{u} &\text{otherwise.}
\end{cases}
\]
We define the Hamilonian $H(t,x_t,u_t,p_t):=p_tb(t,x,u)$ where
$p_tb(t,x_t,u_t)$ is the scalar product in $\mathbb{R}^d $.

\begin{lemma} \label{lem1}
(1) Let $X_{h}$ the corresponding trajectory of $u_{h}$ then
\[
\mathbb{E(}\sup_{t\leq T} | X_t^{h}-\hat{X}_t| )^2\leq
Kh^2.
\]
(2) Let $\Phi (t)$ be the solution of the linear stochastic
differential equation
\begin{equation}
\begin{gathered}
d\Phi (t)=b_{x}(t,\hat{X}_t,\hat{u}_t)\Phi (t)
dt+\sum_{1\leq j\leq d} \sigma_{x}^{j}(t,\hat{X}_t)\Phi (
t)dB_t^{j}, \\
\Phi_0=b(t,\hat{X}_t,v)-b(t,\hat{X}_t,\hat{u}),
\end{gathered}  \label{e3.2}
\end{equation}
where $b_{x}$ and $\sigma_{x}^{j}$ are the derivatives
of $b$ and $\sigma ^{j}$ ($j=1,\dots ,d$ ) in the state variable $x$.
Then
\[
\lim_{h\to 0} \mathbb{E(}|\frac{X_T^{h}-\hat{X}_T}{h}-\Phi (T)|^2)=0.
\]
(3) $\frac{d}{dh}\{ J(u_{h})\} \big|_{h=0}
=\mathbb{E}[H(t,\hat{X}_t,\hat{u},p_t)]-\mathbb{E}[
H(t,\hat{X}_t,v,p_t)]$.
\end{lemma}

See Bensoussan \cite{b3} or  Mezerdi \cite{m1} for
the a detailed proof of the above lemma.
Under the differentiability assumptions \eqref{e3.1}, the
regular version of the stochastic maximum principle given
by the following lemma.

\begin{lemma} \label{lem2}
Let $(\hat{X},\hat{u})$ be an optimal pair. Then there
exists an adapted process $p(t)$ satisfying
\begin{gather}
p(t)=-\mathbb{E}[\Phi ^{\ast }(T,t)g_{x}(
\hat{X}_T)| \mathcal{F}_t],   \label{e3.3}\\
H(t,\hat{X}_t,\hat{u},p_t)=\max_{v\in \mathbb{A}}H(t,\hat{X}_t,v,p_t)
\,dt \quad \text{a.e. $\mathbb{P}$ a.s.},   \label{e3.4}
\end{gather}
where $\Phi ^{\ast }(T,t)$  is the transpose
of $\Phi (T,t)$  solution to \eqref{e3.2}.
\end{lemma}

See Mezerdi \cite{m1} or Yong \textit{et al} \cite{y1} for
the detailed proof of the above lemma.

 We call $p(t)$ the adjoint process, \eqref{e3.3} the adjoint
equation and \eqref{e3.4} the maximum condition.
A control $u^{\varepsilon }$ is called a near optimal if
for all $\varepsilon >0$, there exists
$u^{\varepsilon }$ such that
\[
J(u^{\varepsilon })\leq \inf \{ J(u):u\in \mathcal{U}
_{\rm ad}\} +\varepsilon .
\]
In this part we establish necessary conditions of near optimality
satisfied by a sequence of nearly optimal strict controls.
This result is based on Ekeland's variational principle, which
is given as follows.

\begin{lemma}[Ekeland's Lemma] \label{lem3}
 Let $(E$, $d)$ be a complete metric space and
$f:E\to \overline{\mathbb{R}}$ be lower semicontinuous and bounded
from below. Given $\varepsilon >0$ and $u^{\varepsilon }\in E$ satisfies
$f(u^{\varepsilon })\leq \inf (f)+\varepsilon $.
Then for any $\lambda >0$, there exists $v\in E$ such that
\begin{itemize}
 \item[(i)] $f(v)\leq f(u^{\varepsilon})$.

 \item[(ii)] $d(u^{\varepsilon },v)\leq \lambda $.

 \item[(ii)] $f(v)<f(w)+\frac{\varepsilon }{\lambda }d(v,w)$
for all $w\neq v$.
\end{itemize}
\end{lemma}

See Ekeland \cite{e1} for a proof of the above lemma.
To apply Ekeland's variational principle to our problem we must
define a distance $d$ on the space of admissible controls such that
$(\mathcal{U}_{\rm ad},d)$ becomes a complete metric space, we pose:
for any $u$, $v$ $\in \mathcal{U}_{\rm ad}$
\[
d(u,v)=\mathbb{P}\otimes dt\{ (w,t)\in \Omega
\times [0,T]: u(w,t)\neq v(w,t)\} .
\]
Where $\mathbb{P}\otimes dt$ is the product measure of $\mathbb{P}$
with the Lebesgue measure $dt$ on $[0,T]$. In the next
lemma we give some properties.

\begin{lemma} \label{lem4}
(1) $(\mathcal{U}_{\rm ad},d)$ is a complete metric space.

(2) Let $u$, $v$ $\in \mathcal{U}_{\rm ad}$, then
the following estimate holds
\[
\mathbb{E}(\sup_{t\leq T} |X_t^{u}-X_t^{v}|^2)\leq K[d(u,v)]^{1/2},
\]
where $X^{u}$ and $X^{v}$ are the corresponding trajectories
of $u$ and $v$.
\end{lemma}

\begin{proof}
The proof of (1) can be found in  Yong \textit{et al}
\cite[pp. 146-147]{y1}.
Item (2) is proved using Lipschitz assumptions on the coefficients,
Burkholder-Davis-Gandy and Cronwall's inequalities.
\end{proof}

 Denote by $X^{\varepsilon }$ the unique solution of the
stochastic differential equation
\begin{equation}
\begin{gathered}
dX_t^{\varepsilon }=b(t,X_t^{\varepsilon },u_t^{\varepsilon
})dt+\sigma (t,X_t^{\varepsilon })dB_t, \\
X_0^{\varepsilon }=x.
\end{gathered}  \label{e3.5}
\end{equation}
Let $\Phi ^{\varepsilon }(t,s)$ be the fundamental solution of
the linear stochastic differential equation $(t\geq s)$
\begin{equation}
\begin{gathered}
d\Phi ^{\varepsilon }(t)=b_{x}(t,X_t^{\varepsilon
},u_t^{\varepsilon })\Phi (t)dt+\sum_{1\leq j\leq d} \sigma_{x}^{j}(t,X_t^{\varepsilon })\Phi (
t)dB_t^{j}, \\
\Phi ^{\varepsilon }(t,t)=I_{d},
\end{gathered}   \label{e3.6}
\end{equation}
and the adjoint process associated to $u^{\varepsilon }$ is given
by
\[
p^{\varepsilon }(t)=-\mathbb{E}[\Phi ^{\ast \varepsilon
}(T,t)g_{x}(X_T^{\varepsilon })| \mathcal{F}_t].
\]

\begin{lemma}[Aproximate maximum principle] \label{lem5}
 For\ each $\varepsilon >0$ there exist $u_t^{\varepsilon }$
and an adapted process
$p^{\varepsilon }(t)$,  given by \eqref{e3.6}, such that
for all $v\in \mathbb{A}$,
\[
\mathbb{E}[\langle p^{\varepsilon }(t),b(
t;X_t^{\varepsilon },v)\rangle ]\leq \mathbb{E}[
\langle p^{\varepsilon }(t),b(t;X_t^{\varepsilon
},u_t^{\varepsilon })\rangle ]+\varepsilon \;
dt-a.e.
\]
\end{lemma}

\begin{proof}
 Since $u^{\varepsilon }$ is optimal for the cost $J_{\varepsilon
}(u)=J(u)+\varepsilon d(u,u^{\varepsilon})$, then we apply results
of the last section to derive the adjoint process and the
inequality between Hamiltonians. Notice that if
$u_{h}^{\varepsilon }$ denotes a strong perturbation of
$u^{\varepsilon }$ then $d(u_{h}^{\varepsilon },u_{\varepsilon })=h$
(see Mezerdi \cite{m1}, Bensoussan \cite{b3}).
\end{proof}

\section{Filippov's set-valued map and Generalized gradient}

\subsection{Filippov's set-valued map}

We give in this section some basic notions and concepts concerning
generalized Filippov's set-valeud map which is described briefly by the
following.

 Let us consider a function
$b:\mathbb{R}^n\to \mathbb{R}^n$ to which we associate
the following set-valued map called Filippov's
regularization of $b$,
\begin{equation}
F_{b}(x):=\cap_{\lambda (N)=0}
\cap_{\delta >0} \overline{\mathop{\rm co}} b((x+\delta B)
-N),   \label{e4.1}
\end{equation}
where $\overline{\mathop{\rm co}}(A)$ means the closure of the
convex hull of $A$. The first intersection
$\cap_{\lambda (N)=0}$is taken over all sets of
$\mathbb{R}^n$, being negligible with
respected to Lebesgue measure $\lambda $ and $B$ is the
closed unit ball.

Let us consider a function $b:\mathbb{R}^n\to \mathbb{R}^n$
to which we associate the following ordinary differential equation,
\begin{equation}
x'(t)=b(x(t)),\quad t\geq 0,\; x(0) =x.   \label{e4.2}
\end{equation}
Without regularity assumptions on $f$ (Lipschitz continuity), it is well
known that neither existence, nor uniqueness hold true in general.

 An absolutely continuous solution $t\in [0,+\infty )
\mapsto x(t)\in \mathbb{R}^n$ is a Filippov's solution of the
ODE \eqref{e4.2} if and only if it is solution of the  differential
inclusion
\begin{equation}
x'(t)\in F_{b}(x),\quad t\geq 0,\; X(0)=x.   \label{e4.3}
\end{equation}
The set valued map $F_{b}$ is upper semi continuous with compact convex
values. This implies that the differential inclusion \eqref{e4.3}
has a nonempty set of (local) solution (Aubin \cite{a1}). In the following
proposition we summarized some proprieties.

\begin{proposition} \label{prop1}
Let $b:\mathbb{R}^n\to \mathbb{R}^n$ be a measurable and
bounded function. Then we have:
\begin{itemize}
\item[(1)] There exists a negligible-set $N_{b}$ under the Lebesgue
measure such that for any $x$ $\in \mathbb{R}^n:$
\begin{equation}
F_{b}(x)=\cap_{\delta >0} \overline{\mathop{\rm co}}b(
(x+\delta B)-N_{b}),   \label{e4.4}
\end{equation}

\item[(2)] For almost all $x\in \mathbb{R}^n$, we have $b(x)\in F_{b}(x)$.

\item[(3)] The set valued map $F_{b}$ is the smallest upper semi
continuous set- valued map $F$ with closed convex values such that
$b(x)\in F(x)$, for almost all $x\in \mathbb{R}^n$.

\item[(4)] The map $b\mapsto F_{b}$ is single-valued if and only
if there exists a continuous function $g$ which coincides almost
everywhere with $b$. In this case we have
$F_{b}(x)=\{ g(x)\} $. for almost all $x\in \mathbb{R}^n$.

\item[(5)] If a function $\tilde{b}$ coincide almost everywhere with $b$
then $F_{b}(x)=F_{\tilde{b}}(x)$ for all $x\in
\mathbb{R}^n$.

\item[(6)] There exists a function $\bar{b}$ which is equal almost
everywhere to $b$ and such that
\[
F_{b}(x)=\cap_{\delta >0} \overline{\mathop{\rm co}}\bar{b}
(x+\delta B).
\]

\item[(7)] We have $F_{b}(x)=\cap_{b=\bar{b}}\cap_{\delta >0}
\overline{\mathop{\rm co}}\bar{b}(x+\delta B)$,
 where the first intersection is taken over all functions
$\bar{b}$ being equal to $b$ almost everywhere.
\end{itemize}
\end{proposition}

See Bukhdahn \textit{et al} \cite{b6} for a proof of the above proposition.

As an example, in the one dimensional case $(n=1)$, we have
$b:\mathbb{R} \to \mathbb{R}$  for which one can  check that:
for all $x\in \mathbb{R}^n$:
\[
F_{b}(x)=[ \underline{m}_{b}(x),\overline{m}_{b}(x)]
\]
where
\[
\underline{m}_{b}(x):=\sup_{\delta >0} (\mathop{\rm ess\,inf}
_{[x-\delta ,x+\delta ]} b),\quad
\overline{m}_{b}(x):=\inf_{\delta >0} (\mathop{\rm ess\, sup}_
{[x-\delta ,x+\delta ]} b).
\]

In the case where $b(x)=sgn(x)$, then we have
$F_{b}(0)=[-1,1]$.

\subsection{Connection between Filippov's approach and Clarke's
generalized gradient}

We give in this subsection the connection between Filippov's
differential inclusion and the Clarke's generalized gradient.

\noindent\textbf{The Clarke's generalized gradient.}
 Let $V:\mathbb{R}^n\to \mathbb{R}$
be locally Lipschitz continuous. We define
 the generalized gradient of $V$ as
\begin{equation}
\partial_{c}V(x)=\overline{\mathop{\rm co}}\{ \lim_{x_i\to
x}\nabla V(x_i),\; x_i\notin \Omega_{V}\cup N\},   \label{e4.5}
\end{equation}
where $\Omega_{V}$ is the set of Lebesgue measure zero where
$\nabla V$ does not exist and $N$ is an arbitrary set of measure zero.

\begin{lemma} \label{lem6}
The map $F:\{ b:\mathbb{R}^{m}\to \mathbb{R}^n\}
\to \{ g:\mathbb{R}^{m}\to 2^{\mathbb{R}^n}\} $
has the following properties:

 (1) Assume that $b:\mathbb{R}^{m}\to \mathbb{R}^n$ is
locally bounded. Then $\exists N_{b}\subset \mathbb{R}^{m}$,
$\lambda (N_{b})=0$ such that $\forall N\subset \mathbb{R}^{m}$,
$\lambda (N)=0$.
\[
F_{b}(x)=\overline{\mathop{\rm co}}\{ \lim_{x_i\to x}b(
x_i),\;x_i\notin N_{b}\cup N\} .
\]

 (2) Assume that $b$, $f:\mathbb{R}^{m}\to \mathbb{R}^n$
is locally bounded; then
\[
F_{(b+f)}(x)\subset F_{b}(x)
+F_{f}(x).
\]

 (3) Assume that $b_{j}:\mathbb{R}^{m}\to \mathbb{R}
^{n_{j}} $ where $j\in \{ 1,2,\dots ,N\}$ are locally bounded; then
\[
F_{(\overset{j=N}{\underset{j=1}{\Pi }}b_{j})}(x)\subset
\overset{j=N}{\underset{j=1}{\Pi }}F_{b_{j}}(x).
\]

 (4) Let $g$ $:\mathbb{R}^{m}\to \mathbb{R}^n$ is $C^1$,
rank $Dg(x)=n$ and $b:\mathbb{R}^n\to \mathbb{R}^{p}$
be locally bounded; then
\[
F_{b\circ g}(x)=F_{b}(g(x)).
\]

 (5) Let $g:\mathbb{R}^{m}\to \mathbb{R}^{p\times n}$ (i.e.
matrix valued) be $C^{0}$ and $b:\mathbb{R}^{m}\to \mathbb{R}^n$
be locally bounded; then $F_{gb}(x)=g(x)
F_{b}(x)$, where $gb(x):=g(x)b(
x)$.

 (6) Let $V:\mathbb{R}^{m}\to \mathbb{R}$ be locally
Lipschitz continuous, then
$F_{\nabla V}(x)=\partial_{c}V(x)$.
\end{lemma}

The proof of the above lemma can be found in Paden \textit{et al}
\cite{p3}.


\begin{remark} \label{rmk1} \rm
(i) Since $V$ is locally Lipschitz, $|nabla V$ is defined
almost everywhere and locally bounded (Rademacher's Theorem). By
using Lemma \ref{lem5} we have $F_{\nabla V}(x)=\partial_{c}V(x)$.
\\
 (ii) In particular, if $V$ is Fr\^echet-differentiable
at $x$, then
\[
F_{\nabla V}(x)=\partial_{c}V(x)=\{V'(x)\} .
\]
\end{remark}

\section{Main results}

In this section we establish generalized stochastic maximum principle
for diffusion without differentiability assumptions on the
coefficients $b$, $\sigma ^{j}$ satisfies the assumptions \eqref{e2.1}
and \eqref{e2.2}, so we are going to weaken the differentiability
assumptions on this coefficients. This method is described briefly
by the following.

Let $E$ be a Banach space, $E^{\ast }$ its dual and let
$f:V\to \mathbb{R}^d $ where $V$ is a closed subset of $E$,
satisfiying the following conditions:
\begin{itemize}
 \item[(H1)] The exist $\lambda_n >0$, and
$f^n:$ $B$ $\to $ $\mathbb{R}^d $ G\^{a}teaux-differentiable in
the ball $(y+\lambda_n B)$ and $f^n(y)=f(y)$.

\item[(H2)] There exists $\varepsilon_n >0$ such
that $\frac{\varepsilon_n }{\lambda_n }\to 0$ as $n\to
+\infty $, $f^n$ is continuous and
$|f^n(x)-f(x)| \leq \varepsilon_n $ for
$x\in (y+\lambda_n B)$.
\end{itemize}

 We shall approximate the drift $b$ and the diffusion $\sigma ^{j}$
by a sequence of smooth functions $b^n$ and $\sigma ^{j,n}$ by
using the following regularization

 Let $\xi_n :\mathbb{R}\to \mathbb{R}$ be a positive
$C^{\infty }$ application vanishing out of the interval
$[-\varepsilon_n ,\varepsilon_n ]$ such that
$\int_{\mathbb{R}}\xi_n (x)dx=1$ and
$\lim_{n\to +\infty }\varepsilon_n \to 0$. We denote
$\rho_n =\prod_{j=1}^{j=d} \xi_n (x^{j})$ where
$x=(x^1,\dots ,x^d )\in \mathbb{R}^d $. $\rho_n $ is a
$C^{\infty }$ function with compact support.

 We define the following smooth functions $b^n=b\ast \rho_n $
and $\sigma ^{j,n}=\sigma ^{j}\ast \rho_n $ obtained by the
convolution of
all the components of $b$ and $\sigma ^{j}$ with $\rho_n $.
We give in the next theorem the following proprieties
satisfied by these
functions.

\begin{lemma} \label{lem7}
(1) $b^n:[0,T]\times \mathbb{R}^d \mathbb{\times A}
\to \mathbb{R}^d $ and $\sigma ^{j,n}:[0,T]\times
\mathbb{R}^d \to \mathbb{R}^d $ are Borelian.

 (2) $b^n$ and $\sigma ^{j,n}$ are $k$-Lipschizian in
the second variable $x$ and has linear growth.

 (3) $b^n$ and $\sigma ^{j,n}$ are a
$\mathcal{C}^{\infty}$ in $x$, and for all
$(t,x,u)\in [0,T]\times \mathbb{R}^d \times \mathbb{A}$:
$|b^n(t,x,u)-b(t,y,u) |\leq k\varepsilon_n $, and
$|\sigma ^n(t,x)-\sigma (t,y)|\leq k\varepsilon_n $.
\end{lemma}

The statements in the above lemma are a classical facts;
see Frankowska \cite{f2} and Mezerdi \cite{m1}.

Note that $b^n$ and $\sigma ^{j,n}$ satisfies conditions
(H1) and (H2) with $\lambda_n =\sqrt{\varepsilon_n }$.
So we can define
\begin{gather*}
\partial_{c}b(t,y,u)=\cap_{n\geq 0}
 \overline{\mathop{\rm co}} \cup_{k\geq n} [b_{x}^{k}(
t,x,u): x\in (y+\lambda_n B)]. \\
\partial_{c}\sigma ^{j}(t,y)=cap_{n\geq 0}
\overline{\mathop{\rm co}}\cup_{k\geq n} [\sigma
_{x}^{j,k}(t,x): x\in (y+\lambda_n B)].
\end{gather*}
Let $X^n$ be the solution of
\begin{equation}
\begin{gathered}
dX_t^n=b^n(t,X_t^n,u_t^n)dt+\sigma ^n(t,X_t^n)dB_t, \\
X_0^n=x.
\end{gathered}   \label{e5.1}
\end{equation}
Let $\Phi_n (s,t)$ denote the fundamental solution of the
linear equation $(s\geq t)$,
\begin{equation}
\begin{gathered}
d\Phi_n (t)=b_{x}^n(t,X_t^n,u_t^n)\Phi
_n (t)dt+\sum_{1\leq j\leq d} \sigma
_{x}^{j,n}(t,X_t^n)\Phi_n (t)dB_t^{j},\\
\Phi_n (s,s)=I_{d}.
\end{gathered}  \label{e5.2}
\end{equation}
The following lemma will play an interesting role below.

\begin{lemma} \label{lem8}
There exists $u^n\in \mathcal{U}_{\rm ad}$ and
$\lambda_n =\sqrt{\delta_n }$ such that
\begin{itemize}
\item[(i)] $d(u^n,u^{\ast })\leq \lambda_n $.

\item[(ii)] $\mathbb{E}[H_n (t;X_t^n,u_t^n,p_t^n)]\geq \mathbb{E}[
H_n (t;X_t^n,v,p_t^n)]-\lambda_n $, for all
$v\in A$, $dt-a.e$.
for all $v\in \mathbb{A}$. The associate adjoint process is
given by
\[
p_n (t)=-\mathbb{E}\{ \Phi_n ^{\ast }(T,t)
g_{x}(X_T^n)|_{\mathcal{F}_t}\} .
\]
\end{itemize}
\end{lemma}

\begin{proof}
Since $u_t^n$ is optimal for the cost
$J_n (u)+(\delta_n )^{1/2}d(u,u^n)$ then, we
proceed as in Lemma \ref{lem2} to derive a maximum principle for $u_t^n$.
The rest of the proof is similar to the approximate maximum
principle, see Yong \textit{et al} \cite{y1}.
\end{proof}

 Notice that since the gradient $g_{x}$ is continuous, then we have
$g_{x}(X_T^n)\to g_{x}(\hat{X}_T)$ $\mathbb{P}$-a.s. as $n\to +\infty $.

\subsection{Weak limit solution with stable convergence}

The limit of $\Phi_n $ is proved by using the stable convergence of
probability measure introduced by Jacod \textit{et al} \cite{j1}.
This convergence is contained between convergence in law and
convergence in probability. We shal make use of the notion of
good extension of a filtered probability space.

\begin{definition} \label{def2} \rm
 The space  $(\overline{\Omega },\overline{\mathcal{
\mathcal{F} }},\overline{\mathcal{F}}_t,\overline{\mathbb{P}}
)$ is a good extension of $(\Omega ,\mathcal{
\mathcal{F} },\mathcal{F}_t,\mathbb{P})$ if the
following conditions are satisfied
\begin{itemize}
\item[(1)] $\overline{\Omega } =\Omega \times \hat{\Omega}$
where $\hat{\Omega}$ is an  auxiliary space.

\item[(2)] $\mathcal{F} \in \overline{\bar{\mathcal{F}}}$; i.e.,
$A\times \hat{\Omega}\in \overline{\mathcal{F}}$, for
$A\in \mathcal{F} $,
$\mathcal{F}_t\in \overline{\mathcal{F}}_t$.

\item[(3)] $\overline{\mathbb{P}}(A\times \hat{\Omega}
)=\mathbb{P}(A)$ for $A\in \mathcal{F} $.

\item[(4)] Each $(\mathcal{F}_t-\mathbb{P})$
martingale is a $(\overline{\mathcal{F}},
-\overline{\mathbb{P}})$ martingale.
\end{itemize}
\end{definition}

 Clearly, since $b^n$ and $\sigma ^{j,n}$ are $k$-Lipshitz in $x$
and continuously differentiable, then the matrix of partial derivatives
$b_{x}^n$ and $\sigma_{x}^{j,n}$ are bounded by the Lipschiz
constant $k$.

Let us define the canonical spaces associated to the processes
$b_{x}^n(t,X_t^n,u_t^n)$,
$\sigma_{x}^{j,n}(t,X_t^n)$,
$(\sigma_{x}^n)^{\ast }(\sigma_{x}^n)$ and $\Phi_n (t)$:

(1) Let $\Omega_1$, the canonical
space of $b_{x}^n(t,X_t^n,u_t^n)$, define by the
following:
 Let
$D_1=\{ \beta_1:[0,T]\to
\mathbb{R}^d \otimes \mathbb{R}^d \text{ measurable such that }\|
\beta_1\| \leq c\} $.

 It is clear that $b_{x}^n$ take values in $D_1$ which is
uniformly integrable subset of
$\mathbb{L}^1([0,T],\mathbb{R}^d \otimes \mathbb{R}^d )$,
hence it is a relatively compact subset with
respect to the weak topology
$\sigma (\mathbb{L}^1,\mathbb{L}^{\infty })$ (Dunford-Pettis Thoerem).
Let $\Omega_1=\overline{D}_1$ (weak or strong closure of $D_1$,
 because $D_1$ is convex).

 We define $\mathcal{F}^1$ the filtration of the
coordinates generated by the subsets of the form
\[
A=\{ \beta_1\in \Omega_1:\int_0^{t}\langle
\beta_1,f(s)\rangle ds\leq c,\text{ where }c\in
\mathbb{R},\;f\in L^{\infty }([0,T],\;\mathbb{
R}^d \otimes \mathbb{R}^d )\} .
\]
$(\Omega_1$, $\mathcal{F}^1$, $\mathcal{
\mathcal{F} }_t^1)$ is the canonical space associated to the process
$b_{x}^n(t,X_t^n,u_t^n)$.

(2) Let $D_2=\{ \beta_2^{j}
:[0,T]\to \mathbb{R}^d \otimes \mathbb{R}^d
\text{ measurable such that }\| \beta_2^{j}\| \leq c\} $.
It is clear that $\sigma_{x}^{j,n}(t,X_t^n)$
take values in $D_2$ which is uniformly integrable subset of
$\mathbb{L}^1([0,T])$, hence it is a relatively compact
subset with respect to the weak topology $\sigma (\mathbb{L}^1,
\mathbb{L}^{\infty })$ (Dunford-Pettis Thoerem). Let $\Omega_2=
\overline{D}_2$\textit{\ }(weak or strong closure of $D_2$, because
$D_2$ is convex). Then $\Omega_2$ are compact metrisible spaces.

 We define $\mathcal{F}^2$ the filtration of the
coordinates generated by the subsets of the form
\[
B=\{ \beta_2^{j}:\int_0^{t}\langle \beta
_2^{j},h(s)\rangle ds\leq c,\text{ where }c\in \mathbb{R
},\;h\in L^{\infty }([0,T],\;\mathbb{R}
^d \otimes \mathbb{R}^d )\} .
\]
where  $(\Omega_2$, $\mathcal{F}^2$,
$\mathcal{F}_t^2)$ is the canonical space associated
to the process $\sigma_{x}^{j,n}(t,X_t^n)$.

(3) Let $\Omega_3=\{ a\in \mathbb{L}^2([0,T])\text{ such that }\|
a\| \leq c\} $,
$(\Omega_3,\;\mathcal{ \mathcal{F} }^{3},\;\mathcal{F}_t^{3})$ is the
canonical space associated to the process
$a_n =\sigma_{x}^{j,n}( t,X_t)^{\ast }.\sigma_{x}^{j,n}(t,X_t)$.

(4) $\Phi_n $ has a continuous trajectories,
then $\Phi_n :(\Omega ,\mathcal{F},\mathcal{F}
_t,\mathbb{P})\to \Omega_{4}$ where $\Omega_{4}$ is the
space of continuous functions from
$[0,T] to \mathbb{R}^d \otimes \mathbb{R}^d $ equipped with the
topology of uniform convergence and $\mathcal{F}_t^{4}$
the filtration coordinates.
 By this definitions, we introduce the product space
\[
\overline{\Omega }=\Omega \times \Omega_1\times \Omega_2\times \Omega
_3\times \Omega_{4},
\]
equipped with the filtration
\[
\overline{\mathcal{F}}_t
=\cap_{s\geq t} \mathcal{
\mathcal{F} }_{s}\otimes \mathcal{F}_{s}^1\otimes \mathcal{
\mathcal{F} }_{s}^2\otimes \mathcal{F}_{s}^{3}\otimes \mathcal{
\mathcal{F} }_{s}^{4}.
\]

We associate with $(b_{x}^n(.,X^n,u^n),\sigma
_{x}^{j,n}(.,X^n),\;a_n (.,X^n),\Phi_n )$ the randomized variable
$\overline{\mathbb{P}}_n $ which
is a probability measure defined on $(\overline{\Omega },\overline{
\mathcal{F}})$ by
\[
\overline{\mathbb{P}}_n (w,w_1,w_2,w_3,w_{4})=\mathbb{P}
(w)\mathbb{\delta }_{b_{x}^n}(dw_1)\mathbb{
\delta }_{\sigma_{x}^{j,n}}(dw_2)\mathbb{\delta }
_{a_n }(dw_3)\mathbb{\delta }_{\Phi_n }(dw_{4}),
\]
where $\delta_{x}$ the \textit{Dirac measure} at $x$ and $\Phi_n $
solution of \eqref{e5.2}.

\begin{theorem} \label{thm1}
The space $(\bar{\Omega},\overline{\mathcal{F}},\overline{
\mathcal{F}}_t,\overline{\mathbb{P}}_n )$ is a good
extension of the space $(\Omega ,\mathcal{F},\mathcal{
\mathcal{F} }_t,\mathbb{P})$, moreover the canonical process $\Phi
_t(w,w_1,w_2,w_3,w_{4})=w_{4}(t)$ is a
solution of the  stochastic differential equation
\begin{gather*}
d\Phi (t)=\beta_1(t)\Phi (t)
dt+\sum_{1\leq j\leq d}\beta_2^{j}(t)\Phi (t)dB_t^{j} \\
\Phi (0)=I_{d},
\end{gather*}
on the space $(\overline{\Omega },\overline{\mathcal{F}},
\overline{\mathcal{F}}_t,\overline{\mathbb{P}}_n )$,
where $\hat{B}_t$ is a Brownian motion which is independent of $B_t$.
\end{theorem}

\begin{proof}
Let $\mathbb{E}$, $\overline{\mathbb{E}}_n $ and $\overline{
\mathbb{E}}$ the expectation with respect to the randomized probability
$\mathbb{P}$, $\overline{\mathbb{P}}_n $ and $\overline{\mathbb{P}}$
respectively. It is sufficient to verify that all $(\mathcal{
\mathcal{F} }_t-\mathbb{P})$ martingale is$ (\overline{\mathcal{
\mathcal{F} }}_t$, $\overline{\mathbb{P}}_n )$ martingale.
\end{proof}

 The sequence $\overline{\mathbb{P}}_n $ converge with respect to
stable convergence to a limit $\overline{\mathbb{P}}$ if and only if:
\[
\lim_{n\to +\infty } \overline{\mathbb{P}}_n [
g(w,w_1,w_2,w_3,w_{4})]=\overline{\mathbb{P}}[
g(w,w_1,w_2,w_3,w_{4})].
\]
For every function $g: \overline{\Omega }\to \mathbb{R}$
measurable bounded such that: $g(w,.,.,.,.)$ is continuous
for all $w\in \Omega $.

 To prove that the sequence $\overline{\mathbb{P}}_n $ is
relatively compact with respect to stable convergence, it is
sufficient to prove that the projections of
$\overline{\mathbb{P}}_n $ on $\Omega_1$, $\Omega_2$, $\Omega_3$ and
 $\Omega_{4}$ are relatively compact in the topology of narrow
convergence.

\begin{lemma} \label{lem9}
(i) Let $\Phi_n $ be the solution of \eqref{e5.2} then there
exists a positive constant $M$ such that
 for all $n\in \mathbb{N}$ and $s,t\in [0,T]$,
\[
\mathbb{E}(\| \Phi_n (t)-\Phi_n ( s)\| ^{4})\leq M| t-s| ^2.
\]
 (ii) The sequence $\overline{\mathbb{P}}_n $ is relatively compact
with respect to the topology of stable convergence
\end{lemma}

\begin{proof}
Statement (i) follows from the Schwartz and
Burkholder-Davis-Gandy inequalities.

(ii) Since $\Omega_1$ (resp. $\Omega_2$ ) is
compact, then the sequence of the projections of
$\overline{\mathbb{P}}_n $
on $\Omega_1$ (resp. $\Omega_2$) is tight, then relatively compact
(Prokhorov's Theorem). Moreover the projections of $\Phi_n $ on
$\Omega_{4}$ coincides with the distributions of $\Phi_n $
 which satisfy (i) of Lemma \ref{lem9}, then $\Phi_n $ is relatively
compact with respect to the topology of stable convergence.
\end{proof}

\begin{theorem} \label{thm2}
Let $\overline{\mathbb{P}}$ be a limit of $\overline{\mathbb{P}}_n $
(in the sense of stable convergence), then $(\overline{\Omega },
\overline{\mathcal{F} },\overline{\mathcal{F} }_t,\overline{\mathbb{P}}
)$ is a good extension of the space
$(\Omega , \mathcal{F},\mathcal{F}_t,\mathbb{P})$.
Moreover the canonical process $\Phi_t(w,w_1,w_2,w_3,w_{4})=w_{4}(t)$
satisfies
\begin{equation}
\begin{gathered}
d\Phi (t)=\beta_1(t)\Phi (t)
dt+\sum_{1\leq j\leq d}\beta_2^{j}(t)\Phi (
t)dB_t^{j}+\sum_{1\leq j\leq d}\hat{\beta}_2^{j}(
t)\Phi (t)d\hat{B}_t^{j} \\
\Phi (s,s)=I_{d},
\end{gathered}   \label{e5.3}
\end{equation}
\end{theorem}

\begin{proof}
 For doing this we need the techniques in Mezerdi
\cite{m1} and in Jacod \textit{et al} \cite{j1}, so it is
sufficient to prove that all $(\mathcal{F}_t-\mathbb{P}
)$ martingale is $(\overline{\mathcal{F}}_t-\overline{
\mathbb{P}})$ martingale. Let $M_t$ be a
$(\mathcal{F}_t-\mathbb{P)}$ martingale, and
$\mathcal{Z}$ a bounded\ random variable,
$\overline{\mathcal{F}}_{s}$ measurable such that
$(w_1,w_2,w_3,w_{4})\to \mathcal{Z}(w_1,w_2,w_3,w_{4})$ is continuous.

 According to Theorem \ref{thm1}, the space $(\bar{\Omega},\overline{
\mathcal{F}},\overline{\mathcal{F}}_t,\overline{
\mathbb{P}}_n )$ is a good extension of
$(\Omega ,\mathcal{ \mathcal{F} },\mathcal{F}_t,\mathbb{P})$.
$(s\leq t)$. Then $\overline{\mathbb{E}}_n [M_t\mathcal{Z}]=
\overline{\mathbb{E}}_n [M_{s}\mathcal{Z}]$ for all
$n\in \mathbb{N}$. Since $\overline{\mathbb{P}}$ is a limit of
$\overline{\mathbb{P}}_n $ we have
\[
\overline{\mathbb{E}}[M_t\mathcal{Z}]
=\lim_{n\to +\infty } \overline{\mathbb{E}}_n [M_t\mathcal{Z}]=
\lim_{n\to +\infty } \overline{\mathbb{E}}_n [M_{s}
\mathcal{Z}]=\overline{\mathbb{E}}[M_{s}\mathcal{Z}].
\]

 The set of random variables $\mathcal{Z}$ are
$\mathcal{F}_{s}$ measurable bounded continuous in
$(w_1,w_2,w_3,w_{4})$ generates a $\sigma -$field contained
between $\overline{\mathcal{F}}_{s^{-}}$ and $\overline{\mathcal{
\mathcal{F} }}_{s}$. Then $\overline{\mathbb{E}}[(
M_t-M_{s})| \overline{\mathcal{F}}_{s^{-}}]=0$,
the right continuity of $M_t$ implies that
$\overline{\mathbb{E}} [(M_t-M_{s})| \overline{\mathcal{F}}_{s}
]=0$.

$\Phi_t$ takes values in $\mathbb{R}^d \otimes \mathbb{R}^d $ then
$\Phi_t$ is a solution of  \eqref{e5.3}. Applying
similar techniques as those in \cite{j1} where it is sufficient to prove
that for all $(h_1,h_2)\in \mathbb{R}^d \times \mathbb{R}^d $
\[
M_t(h_1,h_2)=h_1B_t+h_1(\Phi_{s}-\Phi_0
-\int_0^t \beta_1(s)\Phi_{s}ds).
\]
Here $M_t$ is a $(\overline{\mathcal{F} }_t-\overline{\mathbb{P}}
)$ martingale and have a quadratic variation of the form
\[
A_t(h_1,h_2)=h_1^2t+2h_1h_2\overset{t}{\underset{0
}{\int }}\beta_1(s)\Phi_{s}ds+h_2^2\underset{0}{\overset
{t}{\int }}a(s)\Phi_{s}^2ds.
\]
Note that $M_t$ is an $(\mathcal{\bar{\mathcal{F}}}_t-\mathbb{
\bar{P}}_n )$ martingale for all $n\in \mathbb{N}$ and
$(w,w_1,w_2)\to M_t(h_1,h_2)$ is continuous.

 To pass to the limit, we must show that $M_t$ is
sufficiently integrable. Because $\beta_1(t)$, and
$\beta _2^{j}(t)$ are bounded and
$\mathbb{E}[(\sup_{t\leq T}| \Phi_{s}^{i}| )^{p}]<+\infty $. We deduce
that $\sup_n \overline{\mathbb{E}}_n [| M_t(h_1,h_2)| ^2]<+\infty $,
$\forall p\geq 1$. Then if $\mathcal{Z}$ is a bounded
$\mathcal{\bar{\mathcal{F}}}_t$-measurable
random variable continuous in $(w_1,w_2,w_3,w_{4})$ we
have $\overline{\mathbb{E}}_n [(M_t-M_{s})\mathcal{Z}
]\to \overline{\mathbb{E}}[(M_t-M_{s})
\mathcal{Z}]$ as $n\to +\infty $. Hence
$M_t( h_1,h_2)$ is an $(\overline{\mathcal{F}}_t-
\overline{\mathbb{P}})$ martingale.

 The extra term $\sum_{1\leq j\leq d}\hat{\beta}
_2^{j}(t)\Phi_td\hat{B}_t^{j}$ comes from the It\^{o}
decomposition Theorem for martingales adapted to a filtration
supporting a Brownian motion.
\end{proof}

 The same method can be performed for
$M_t^2(h_1,h_2)-A_t(h_1,h_2)$.
Now we are ready to state our main result.

\begin{theorem} \label{thm3}
Let $\hat{u}$ be an optimal control and $\hat{X}$ corresponding
trajectory, then there exists a probability $\overline{\mathbb{P}}$
 on the space
$(\overline{\Omega },\overline{\mathcal{F} },\overline{\mathcal{F} }
_t)$ such that
\begin{itemize}
\item[(1)] $\overline{\mathbb{E}}[H(t,\hat{X}_t,\hat{u}
_t,\overline{p}_t)]=\max_{v\in \mathbb{A}}
\overline{\mathbb{E}}[H(t,\hat{X}_t,v,\overline{p}_t)]$, dt-a.e.

\item[(ii)] $\overline{p}_t=-\overline{\mathbb{E}}[\Phi
^{\ast }(T,t)g_{x}(\hat{X}_T)|_{\overline{\mathcal{
\mathcal{F} }}_t}]$.
where $\Phi ^{\ast }(T,t)$  is the
transpose of $\Phi (T,t)$ given by \eqref{e5.3}.
\end{itemize}
\end{theorem}

\begin{proof}
According to Lemma \ref{lem8} there exists a control $u_t^n$ such that
$d(u_t^n,\hat{u}_t)\leq \lambda_n $. So it is sufficient to
prove that
\[
\lim_{n\to +\infty } \mathbb{E}[H(t,X_t^n,u_t^n,p_n (t))]
=\overline{\mathbb{E}}[H(t,\hat{X}_t,\hat{u}_t,\overline{p}(t))].
\]
\end{proof}

\subsection{Filippov approach and the support of a limit
$\overline{\mathbb{P}}$}
Our goal in this subsection is to prove a stochastic maximum
principle for the optimal controls without differentiability
assumptions.

 Let $\overline{\mathbb{P}}$ be a stable limit of $(\overline{
\mathbb{P}}_n )$, now we give in this section the connection between
the support of $\overline{\mathbb{P}}$ and the generalized
Filippov's set of $b$ and $\sigma ^{j}$ at $(\hat{X}_t,\hat{u}_t)$.

 Let $\widetilde{\Omega }=\Omega \times \Omega_1\times \Omega
_2\times \Omega_3$,
$\widetilde{\mathcal{F}}_t=\cap_{s\geq t}
\mathcal{F}_{s}\otimes \mathcal{F}
_{s}^1\otimes \mathcal{F}_{s}^2\otimes \mathcal{F}
_{s}^{3}$ and
$\widetilde{\mathbb{P}}$ is the projection
$\overline{\mathbb{P}}$ on $\widetilde{\Omega }$ then we have
\[
\widetilde{\mathbb{P}}(dw,dw_1,dw_2,dw_3)=\overline{
\mathbb{P}}(dw,dw_1,dw_2,dw_3,\Omega_{4}),
\]
 where $(\widetilde{\Omega },\widetilde{\mathcal{F}},
\widetilde{\mathcal{F}}_t,\widetilde{\mathbb{P}})$ is a
good extension of $(\Omega ,\mathcal{F},\mathcal{F}
_t,\mathbb{P})$ and $\widetilde{\mathbb{P}}$ is a stable limit of
$\widetilde{\mathbb{P}}_n $, where $\widetilde{\mathbb{P}}_n $ denotes
the projection de $\overline{\mathbb{P}}_n $ on $\widetilde{\Omega }$.
Moreover if we consider equation \eqref{e5.3} on the space
$(\widetilde{\Omega },\widetilde{\mathcal{F}},
\widetilde{\mathcal{ \mathcal{F} }}_t,\widetilde{\mathbb{P}})$,
then it has a strong unique
solution. If we denote by $\widetilde{\Phi }_t(
w,w_1,w_2,w_3)$ the solution on $(\widetilde{\Omega },
\widetilde{\mathcal{F}},\widetilde{\mathcal{F}}_t,
\widetilde{\mathbb{P}})$ then
$\widetilde{\Phi }_t=\Phi_t\overline{\mathbb{P}}$-a.s., and
\[
\overline{\mathbb{P}}(dw,dw_1,dw_2,dw_3,dw_{4})=
\widetilde{\mathbb{P}}(dw,dw_1,dw_2,dw_3)\delta_{
\widetilde{\Phi }_t(w,w_1,w_2,a)}(dw_{4}).
\]

 If $D$ denotes a support of the probability $\widetilde{\mathbb{P}}
(dw,dw_1,dw_2,dw_3)$, according to Jacod \textit{et al}
\cite{j1} there exists a subsequence such that $(\mathbb{P}-a.s)$.

 $D_{w}^1$: the set of limit points of the subsequence of
$b_{x}^n(t,X^n(w),u^n(w))$ where $w$ is fixed.

 $D_{w}^2$: the set of limit points of the subsequence of $\sigma
_{x}^{j,n}(t,X^n(w))$ where $w$ is fixed.

 The Filippov differential inclusion allows us to expresses the
generalized derivative of $b$ and $\sigma $ in terms of well
defined smooth approximations. This advantage enables to give
the following theorem.

\begin{theorem} \label{thm4}
(i) For almost all $w$, there exists a subsequence
$b_{x}^n( t,X_t^n(w),\hat{u}_t(w))$ and
$\sigma _{x}^{j,n}(t,X_t^n(w))$ such that
\begin{gather*}
\lim_{n\to +\infty }b_{x}^n(t,X_t^n(w),\hat{
u}_t(w))=\beta_1(t)\quad \text{in } \mathbb{L}^1(dt). \\
\lim_{n\to +\infty }\sigma_{x}^{j,n}(t,X_t^n(
w))=\beta_2^{j}(t)\text{ in }\mathbb{L}^1(dt).
\end{gather*}

 (ii) For almost every $t\in [0,T]$. we have
 $\beta _1(t)\in F_{\nabla b}(t,\hat{X}_t,\hat{u}_t)$
and $\beta_2^{j}(t)\in F_{\nabla \sigma ^{j}}(t,\hat{X}_t)$.
\end{theorem}

\begin{proof}
(i) According to the definition of $D_{w}^1$ there exists a
subsequences $b_{x}^n(t,X_t^n(w)$, $u_t^n(w))$ which converges weakly
 in $\mathbb{L}^1(dt)$ to $\beta_1(t)$. Moreover,
\[
\mathbb{E}[\int_0^{T}|b_{x}^n(s,X_t^n,u_t^n)-b_{x}^n(t,X_t^n,\hat{u}_t)
|dt]\leq Md(u^n,\hat{u}_t).
\]
 Then there exists a subsequence such that
\[
[\int_0^{T}|b_{x}^n(
s,X_t^n,u_t^n)-b_{x}^n(t,X_t^n,\hat{u}_t)
|dt]\to 0\quad \text{as }n\to +\infty ,\;
\mathbb{\tilde{P}}\text{-a.s.},
\]
using a similar proof as for $\lim_{n\to +\infty }\sigma
_{x}^{j,n}(t,X_t^n(w))=\beta_2^{j}(t)$.

(ii) Let $\beta_1(t)\in D_{w}^1$,
according to (i) Theorem \ref{thm4}, there exists a subsequence
$b_{x}^n(t,X_t^n,\hat{u}_t)$) which converges to
$\beta_1(t)$ in $\mathbb{L}^1(dt)$. Moreover, we have
\[
\lim_{n\to +\infty } \mathbb{E}[\sup_{t\leq T} |X_t^n-\hat{X}_t|^2]=0,
\]
so we can extract a subsequences such that
$\sup_{t\leq T} |X_t^n-\hat{X}_t|\leq \lambda_n \mathbb{P}$-a.s.,
 then there exists $n\in \mathbb{N}$ such that
\[
b_{x}^n(t,X_t^n,u_t^n)\in \cup_{k\geq n}
[b_{x}^{k}(t,X_t,\hat{u}_t):X_t\in (\hat{X}
_t+\lambda_n B)].
\]
 According to Mazur's Lemma, there exists a sequence of
convex combinations which converges strongly in
$\mathbb{L}^1(dt)$ to $\beta_1(t)$. Then we have
\[
\beta_1(t)\in \cap_{n\geq 0}
\overline{\mathop{\rm co}}\cup_{k\geq n} [b_{x}^{k}(
t,x,u): x\in (y+\lambda_n B)]\;
dt-a.e.
\]
 According to the property (6) of Lemma \ref{lem6} we have
\[
\beta_1(t)\in F_{\nabla b}(t,\hat{X}_t,\hat{u}_t)\, dt-a.e.
\]
 Applying a similar proof for
$\beta_2^{j}(t)\in F_{\nabla \sigma ^{j}}(t,\hat{X}_t)$. This
completes the proof.
\end{proof}

\begin{remark} \label{rmk2} \rm
Using the same method of proof, we obtain a more general
non-smoothness result for the stochastic maximum principle without
differentiability assumptions in which the control enters both
the drift and the diffusion coefficients where the set of controls
is necessarily convex.
\end{remark}

 \subsection*{Acknowledgements}
 The author thanks the anonymous referee who offered many useful
remarks and suggestions that improved the first
version of this manuscript.
 The author would like to thank Prof. Julio G. Dix, Texas State
University - San Marcos, and Prof. Y. Ouknine, Marrakech University
for their valuable remarks and kind help.

\begin{thebibliography}{00}

\bibitem{a1}  J. P. Aubin, A. Cellina;
\emph{Differential inclusions}.
Grundlehren Mathematischen Wissenschaften, Volume 264, Springer-Verlag,
Berlin (1984).

\bibitem{a2}  V. I. Arkin, M. T. Saksonov;
\emph{Necessary optimality conditions for stochastic differential
equations}, Soviet. Math. Dokl. 20 (1979), pp. 1-5.

\bibitem{b1}  K. Bahlali, B. Mezerdi and Y. Ouknine;
\emph{The maximum principle for optimal control of diffusion
with non-smooth coefficients}. Stochastics. Vol. 57, (1996), pp. 303-316.

\bibitem{b2}  K. Bahlali, B. Djehiche, B. Mezerdi;
\emph{On the stochastic maximum principle in optimal control
of degenerate diffusions with Lipschitz coefficients},
Appl. Math. and Optim., Vol. 56 (2007), pp. 364-378.

\bibitem{b3}  A. Bensoussan;
\emph{Lectures on stochastic contr}. In Lect.
Notes in Math. 972, Springer-Varlag (1983), pp. 1-62.

\bibitem{b4}  J. M. Bismut;
\emph{An introductory approach to duality in
Optimal Stochastic Control}, SAIM, Rev., Vol. 20, no. 1,
 Jan. (1978).

\bibitem{b5}  V. Borkar;
\emph{Controlled diffusion processes. Probabilite surveys}.
Vol. 2 (2005), pp. 213-244.

\bibitem{b6}  R. Buckdahn, Y. Ouknine, M. Quincampoix;
\emph{On limiting values of stochastic differential equations
with small noise intensity tending to zero}.
Bull. Sci. Math. 133 (2009), pp. 229-237.

\bibitem{e1}  I. Ekeland;
\emph{On the variational principle}, J. Math.
Anal. Appl. 47 (1974), pp. 443-474.

\bibitem{f1}  A. F. Filippov;
\emph{Differential equations with discontinuous right-hand sides}.
 Mathematics and Its applications: Soviet
Series, 18. Dordrecht.etc., Kluwer Academic Publishers. (1988).

\bibitem{f2}  H. Frankowska;
\emph{The first ordre necessary conditions
for optimality for nonsmooth variational and control problems},
SAIM J. Control optimal., Vol. 22, no. 1 (1984), pp. 1-12.

\bibitem{h1}  U. G. Haussmann;
\emph{A stochastic maximum principle for optimal of diffusions},
Pitman Reseearch Notes in Math. Series, 151.(1986).

\bibitem{j1}  J. Jacod and J. Memin;
\emph{Sur un type de convergence interm\'{e}diaire entre la
convergence en loi et la convergence en probabilit\'{e}}.
Seminar on probability XV, Lecture Notes in Math. 850 (1981),
pp. 529-546. Springer-Verlag, Berlin.

\bibitem{k1}  H. J. Kushner;
\emph{Necessary conditions for continuous
paramter stochastic optimization problems},
SAIM J. Control Optimal., Vol 10 (1972), pp. 550-565.

\bibitem{m1}  B. Mezerdi;
\emph{Necessary conditions for optimality for a
diffusion with a non-smooth drift}.
Stochastics, Vol. 24 (1988), pp. 305-326.

\bibitem{p1}  S. Peng;
\emph{A general stochastic maximum principle for
optimal control problems}. SIAM J. Contr. Optim. 28, no. 4 (1990),
pp. 966-979.

\bibitem{p2}  L. S. Pontryagin, V. G. Boltanski, R. V. Gamkrelidze;
\emph{The mathematical theory of optimal processes}. Intersciene
New York, (1962)

\bibitem{p3}  B. E. Paden, S. S. Sastry;
\emph{A calculus for Computing Filippov's Differential Inclusion
with Application to the Variable Structure Control of Robot
Manipulators}. IEEE (1987), Vol. 34, No. 1.

\bibitem{w1}  J. Warga;
\emph{Necessary conditions without differentiability
assumptions in optimal control}, jour. of Diff. Equations 18 (1975), pp.
41-62.

\bibitem{y1}  J. Yong and X. Y. Zhou;
\emph{Stochastic Controls.
Hamiltonian Systems and HJB Equations}. Springer-Verlag.
New York, (1999).

\end{thebibliography}

\end{document}