\documentclass[reqno]{amsart} \usepackage{hyperref} \AtBeginDocument{{\noindent\small \emph{Electronic Journal of Differential Equations}, Vol. 2010(2010), No. 97, pp. 1--13.\newline ISSN: 1072-6691. URL: http://ejde.math.txstate.edu or http://ejde.math.unt.edu \newline ftp ejde.math.txstate.edu} \thanks{\copyright 2010 Texas State University - San Marcos.} \vspace{9mm}} \begin{document} \title[\hfilneg EJDE-2010/97\hfil Filippov approach in stochastic maximum principle] {Filippov approach in stochastic maximum principle without differentiability assumptions} \author[M. Hafayed\hfil EJDE-2010/97\hfilneg] {Mokhtar Hafayed} \address{Mokhtar Hafayed \newline Laboratory of Applied Mathematics, University of Med-Khider, PO Box 145, Biskra (7000), Algeria} \email{hafa.mokh@yahoo.com} \thanks{Submitted April 15, 2010. Published July 15, 2010.} \subjclass[2000]{60H10, 34F05} \keywords{Stochastic differential equation; generalized Filippov's solutions; \hfill\break\indent optimal control; maximum principlel Ekeland's variational principle} \begin{abstract} In this article, we establish necessary conditions for optimality in stochastic control of systems governed by stochastic differential equations with nonsmooth coefficients. The approach used is based on the approximation of the nonsmooth coefficient by smooth one which generate a sequence of smooth control problems. Ekeland's variational principle is then applied to obtain a sequence of nearly optimal controls which satisfy necessary conditions for near optimality. By using the generalized notion of Filippov's solutions and the stable convergence, we obtain an explicit formula for the adjoint process and the inequality between the Hamiltonians, on a good extension of the initial filtered probability space. \end{abstract} \maketitle \numberwithin{equation}{section} \newtheorem{theorem}{Theorem}[section] \newtheorem{lemma}[theorem]{Lemma} \newtheorem{proposition}[theorem]{Proposition} \newtheorem{definition}[theorem]{Definition} \newtheorem{remark}[theorem]{Remark} \allowdisplaybreaks \section{Introduction} We study a stochastic control problem where the system is governed by a nonlinear stochastic differential equation (SDE for short) of the form \begin{equation} \begin{gathered} dX_t=b(t,X_t,u_t)dt+\sigma (t,X_t)dB_t,\\ X_0=x. \end{gathered} \label{e1.1} \end{equation} Where $B_t$ is a $d$-dimensional Brownian motion defined on the filtered probability space $(\Omega ,\mathcal{F} ,\mathcal{F}_t,\mathbb{P})$. The finite horizon cost function to be minimized over admissible controls is given by \begin{equation} J(u)=\mathbb{E}(g(X_T)) \label{e1.2} \end{equation} where $u$ is an admissible control and $X_T$ is a diffusion process solution of \eqref{e1.1} at the terminal time $T$. A control $\hat{u}\in \mathcal{U}_{\rm ad}$ is called optimal if it satisfies $J(\hat{u})=\inf_{u\in \mathcal{U}_{\rm ad}}\{ J(u)\} $. The corresponding state trajectory $\hat{X}$ and $(\hat{X},\hat{u})$ are called an optimal state trajectory and optimal pair respectively. The stochastic maximum principle (SMP in short) has been and remains an important tool in the many areas in which optimal control plays a role. Pontryagin \textit{et al} \cite{p2} announced the maximum principle for the first time. Kushner \cite{k1} employed the spike variation and Neustadt's variational principle to derive a stochastic maximum principle. On the other hand, Haussmann \cite{h1} extensively investigated the necessary conditions of stochastic optimal state feed-back controls based on the Girsanov's transformation.\newline The case of stochastic systems with nonsmooth coefficients has been treated in \cite{b1,b2,m1,w1}. Bahlali \textit{et al} \cite{b1} employed the Krylov's inequality to derive a stochastic maximum principle with nonsmooth coefficients and nondegenerate diffusion. Necessary conditions for optimality for degenerate diffusion with nonsmooth coefficient is established by Bahlali \textit{et al} \cite{b2}. The necessary conditions for optimality for diffusion with nonsmooth drift, has been solved by Mezerdi \cite{m1} by using Clarke's generalized gradient and stable convergence of probability measures. A difficulty is treating the case where the diffusion coefficient $\sigma $ contains the control variable $u$. Among those works one can see \cite{a2,b3,p1}. Peng \cite{p1} introduced the second-order adjoint equation and obtained the maximum principle in which the control enters both the drift and the diffusion coefficients where the set of controls is not necessarily convex. A good account and an extensive list of references on the maximum principle and optimal stochastic control can be founded in Yong \textit{et al} \cite{y1}. Filippov \cite{f1} has developed a solution concept for ordinary differential equations (ODEs in short) with a discontinuous right-hand side. When a function $V$ is locally Lipschitz continuous, the associated Filippov differential inclusion is equal to the Clarke's generalized gradient of $V$. The main contribution of the present paper is to extend the stochastic maximum principle to the case where the drift and the diffusion coefficients are nonsmooth in the sense that they are only Lipschitz continuous and satisfy a linear growth condition. Our approach is to express a generalized derivative of $b$ and $\sigma $ using a Filippov differential inclusion type argument in terms of well defined smooth approximations, and stable convergence of probability measures to caracterize the first order adjoint equation. A similar type of stochastic maximum principle has primarily been derived in Mezerdi \cite{m1} with non-differentiable drift using Clarke's generalized gradient. The novelty in our maximum principle is based on the advantage of the Filippov's approach which allows to express the generalized gradient in terms of the underlying approximating sequence $b_{x}^n$ and $\sigma_{x}^n$ constructed in Section 5, a property that is not explicit in Clarke's approach. The rest of the paper is organized as follows. In the section 2, we present the formulation of the problem. Section 3 is devoted to the classical maximum principle.\ In section 4, we give some proprieties of Filippov notion. Section 5 contains our main result where we give a generalized stochastic maximumu principle for our stochastic control problems. \section{Problem Formulation and Preliminaries} Throughout this paper, we assume $(\Omega ,\mathcal{F} ,\mathcal{F} _t,\mathbb{P})$ is a filtered probability space and $B_t$ an Brownian motion with values in $\mathbb{R}^d $. Let $\mathbb{A}$ be a Borelian subset of $\mathbb{R}^n$, $u_t$ is called an admissible control if it is measurable and $\mathcal{F}_t$-adapted with values in $\mathbb{A}$. We denote $\mathcal{U}_{\rm ad}$ the space of admissible controls, $B_t^{j}$ the $j^{th}$ column of $B_t$ and $\sigma ^{j}$ the $j^{th}$ column of the matrix $\sigma$. Let $b:[ 0,T] \times \mathbb{R}^d \mathbb{\times A}\to \mathbb{R}^d $ and the diffusion matrix $\sigma :[0,T]\times \mathbb{R}^d \to \mathbb{R}^d \otimes \mathbb{R}^d $ are Borelian functions such that: for all $(t,x,y,u)\in [0,T] \times \mathbb{R}^d \times \mathbb{R}^d \times \mathbb{A}$, there exists positive constants $K$ and $c$ such that \begin{gather} |\sigma (t,x)-\sigma (t,y)| +|b(t,x,u)-b(t,y,u)|\leq K|x-y|, \label{e2.1} \\ |\sigma (t,x)|+|b(t,x,u)|\leq c(1+|x|), \label{e2.2} \\ b(t,x,.):\mathbb{A}\to \mathbb{R}^d \text{ is continuous.} \label{e2.3} \end{gather} From assumptions \eqref{e2.1} and \eqref{e2.2} it is easy to see that equation \eqref{e1.1} satisfies the usual It\^{o} conditions; therefore it has a unique strong solution such that for any $q\geq 1:$ \[ \mathbb{E}[\sup_{t\leq T} |X_t|^{q}]<+\infty . \] The cost function to be minimized define by \eqref{e1.2} satisfies the following condition \begin{equation} g:\mathbb{R}^d \to \mathbb{R}\text{ is continuously differentiable,} \label{e2.4} \end{equation} such that $|g(X)|\leq c[1+| X|]$ and $|g_{x}(X)|\leq M$; where $g_{x}$ denote the gradient of $g$ at $x$. Finaly throughout this paper, we assume that an optimal control $\hat{u}$ is supposed to exist. \section{Classical stochastic maximum principle} In the regular case, the control problem based to defined an admissible control $\hat{u}$ which minimizes a cost $J(u)$. The conditions must satisfied by the control $\hat{u}$ which is supposed to exists, are called the stochastic maximum principle. In this case we assume \begin{equation} b(t,.,u),\sigma ^{j}(t,.):\mathbb{R} ^d \to \mathbb{R}^d \text{ is continuously differentiable.} \label{e3.1} \end{equation} To obtain these necessary conditions for optimality, we compare $\hat{u}$ with controls which are strong perturbations defined by \[ u_{h}(t)=\begin{cases} v &\text{if }t\in [t_0,t_0+h], \\ \hat{u} &\text{otherwise.} \end{cases} \] We define the Hamilonian $H(t,x_t,u_t,p_t):=p_tb(t,x,u)$ where $p_tb(t,x_t,u_t)$ is the scalar product in $\mathbb{R}^d $. \begin{lemma} \label{lem1} (1) Let $X_{h}$ the corresponding trajectory of $u_{h}$ then \[ \mathbb{E(}\sup_{t\leq T} | X_t^{h}-\hat{X}_t| )^2\leq Kh^2. \] (2) Let $\Phi (t)$ be the solution of the linear stochastic differential equation \begin{equation} \begin{gathered} d\Phi (t)=b_{x}(t,\hat{X}_t,\hat{u}_t)\Phi (t) dt+\sum_{1\leq j\leq d} \sigma_{x}^{j}(t,\hat{X}_t)\Phi ( t)dB_t^{j}, \\ \Phi_0=b(t,\hat{X}_t,v)-b(t,\hat{X}_t,\hat{u}), \end{gathered} \label{e3.2} \end{equation} where $b_{x}$ and $\sigma_{x}^{j}$ are the derivatives of $b$ and $\sigma ^{j}$ ($j=1,\dots ,d$ ) in the state variable $x$. Then \[ \lim_{h\to 0} \mathbb{E(}|\frac{X_T^{h}-\hat{X}_T}{h}-\Phi (T)|^2)=0. \] (3) $\frac{d}{dh}\{ J(u_{h})\} \big|_{h=0} =\mathbb{E}[H(t,\hat{X}_t,\hat{u},p_t)]-\mathbb{E}[ H(t,\hat{X}_t,v,p_t)]$. \end{lemma} See Bensoussan \cite{b3} or Mezerdi \cite{m1} for the a detailed proof of the above lemma. Under the differentiability assumptions \eqref{e3.1}, the regular version of the stochastic maximum principle given by the following lemma. \begin{lemma} \label{lem2} Let $(\hat{X},\hat{u})$ be an optimal pair. Then there exists an adapted process $p(t)$ satisfying \begin{gather} p(t)=-\mathbb{E}[\Phi ^{\ast }(T,t)g_{x}( \hat{X}_T)| \mathcal{F}_t], \label{e3.3}\\ H(t,\hat{X}_t,\hat{u},p_t)=\max_{v\in \mathbb{A}}H(t,\hat{X}_t,v,p_t) \,dt \quad \text{a.e. $\mathbb{P}$ a.s.}, \label{e3.4} \end{gather} where $\Phi ^{\ast }(T,t)$ is the transpose of $\Phi (T,t)$ solution to \eqref{e3.2}. \end{lemma} See Mezerdi \cite{m1} or Yong \textit{et al} \cite{y1} for the detailed proof of the above lemma. We call $p(t)$ the adjoint process, \eqref{e3.3} the adjoint equation and \eqref{e3.4} the maximum condition. A control $u^{\varepsilon }$ is called a near optimal if for all $\varepsilon >0$, there exists $u^{\varepsilon }$ such that \[ J(u^{\varepsilon })\leq \inf \{ J(u):u\in \mathcal{U} _{\rm ad}\} +\varepsilon . \] In this part we establish necessary conditions of near optimality satisfied by a sequence of nearly optimal strict controls. This result is based on Ekeland's variational principle, which is given as follows. \begin{lemma}[Ekeland's Lemma] \label{lem3} Let $(E$, $d)$ be a complete metric space and $f:E\to \overline{\mathbb{R}}$ be lower semicontinuous and bounded from below. Given $\varepsilon >0$ and $u^{\varepsilon }\in E$ satisfies $f(u^{\varepsilon })\leq \inf (f)+\varepsilon $. Then for any $\lambda >0$, there exists $v\in E$ such that \begin{itemize} \item[(i)] $f(v)\leq f(u^{\varepsilon})$. \item[(ii)] $d(u^{\varepsilon },v)\leq \lambda $. \item[(ii)] $f(v)0$ there exist $u_t^{\varepsilon }$ and an adapted process $p^{\varepsilon }(t)$, given by \eqref{e3.6}, such that for all $v\in \mathbb{A}$, \[ \mathbb{E}[\langle p^{\varepsilon }(t),b( t;X_t^{\varepsilon },v)\rangle ]\leq \mathbb{E}[ \langle p^{\varepsilon }(t),b(t;X_t^{\varepsilon },u_t^{\varepsilon })\rangle ]+\varepsilon \; dt-a.e. \] \end{lemma} \begin{proof} Since $u^{\varepsilon }$ is optimal for the cost $J_{\varepsilon }(u)=J(u)+\varepsilon d(u,u^{\varepsilon})$, then we apply results of the last section to derive the adjoint process and the inequality between Hamiltonians. Notice that if $u_{h}^{\varepsilon }$ denotes a strong perturbation of $u^{\varepsilon }$ then $d(u_{h}^{\varepsilon },u_{\varepsilon })=h$ (see Mezerdi \cite{m1}, Bensoussan \cite{b3}). \end{proof} \section{Filippov's set-valued map and Generalized gradient} \subsection{Filippov's set-valued map} We give in this section some basic notions and concepts concerning generalized Filippov's set-valeud map which is described briefly by the following. Let us consider a function $b:\mathbb{R}^n\to \mathbb{R}^n$ to which we associate the following set-valued map called Filippov's regularization of $b$, \begin{equation} F_{b}(x):=\cap_{\lambda (N)=0} \cap_{\delta >0} \overline{\mathop{\rm co}} b((x+\delta B) -N), \label{e4.1} \end{equation} where $\overline{\mathop{\rm co}}(A)$ means the closure of the convex hull of $A$. The first intersection $\cap_{\lambda (N)=0}$is taken over all sets of $\mathbb{R}^n$, being negligible with respected to Lebesgue measure $\lambda $ and $B$ is the closed unit ball. Let us consider a function $b:\mathbb{R}^n\to \mathbb{R}^n$ to which we associate the following ordinary differential equation, \begin{equation} x'(t)=b(x(t)),\quad t\geq 0,\; x(0) =x. \label{e4.2} \end{equation} Without regularity assumptions on $f$ (Lipschitz continuity), it is well known that neither existence, nor uniqueness hold true in general. An absolutely continuous solution $t\in [0,+\infty ) \mapsto x(t)\in \mathbb{R}^n$ is a Filippov's solution of the ODE \eqref{e4.2} if and only if it is solution of the differential inclusion \begin{equation} x'(t)\in F_{b}(x),\quad t\geq 0,\; X(0)=x. \label{e4.3} \end{equation} The set valued map $F_{b}$ is upper semi continuous with compact convex values. This implies that the differential inclusion \eqref{e4.3} has a nonempty set of (local) solution (Aubin \cite{a1}). In the following proposition we summarized some proprieties. \begin{proposition} \label{prop1} Let $b:\mathbb{R}^n\to \mathbb{R}^n$ be a measurable and bounded function. Then we have: \begin{itemize} \item[(1)] There exists a negligible-set $N_{b}$ under the Lebesgue measure such that for any $x$ $\in \mathbb{R}^n:$ \begin{equation} F_{b}(x)=\cap_{\delta >0} \overline{\mathop{\rm co}}b( (x+\delta B)-N_{b}), \label{e4.4} \end{equation} \item[(2)] For almost all $x\in \mathbb{R}^n$, we have $b(x)\in F_{b}(x)$. \item[(3)] The set valued map $F_{b}$ is the smallest upper semi continuous set- valued map $F$ with closed convex values such that $b(x)\in F(x)$, for almost all $x\in \mathbb{R}^n$. \item[(4)] The map $b\mapsto F_{b}$ is single-valued if and only if there exists a continuous function $g$ which coincides almost everywhere with $b$. In this case we have $F_{b}(x)=\{ g(x)\} $. for almost all $x\in \mathbb{R}^n$. \item[(5)] If a function $\tilde{b}$ coincide almost everywhere with $b$ then $F_{b}(x)=F_{\tilde{b}}(x)$ for all $x\in \mathbb{R}^n$. \item[(6)] There exists a function $\bar{b}$ which is equal almost everywhere to $b$ and such that \[ F_{b}(x)=\cap_{\delta >0} \overline{\mathop{\rm co}}\bar{b} (x+\delta B). \] \item[(7)] We have $F_{b}(x)=\cap_{b=\bar{b}}\cap_{\delta >0} \overline{\mathop{\rm co}}\bar{b}(x+\delta B)$, where the first intersection is taken over all functions $\bar{b}$ being equal to $b$ almost everywhere. \end{itemize} \end{proposition} See Bukhdahn \textit{et al} \cite{b6} for a proof of the above proposition. As an example, in the one dimensional case $(n=1)$, we have $b:\mathbb{R} \to \mathbb{R}$ for which one can check that: for all $x\in \mathbb{R}^n$: \[ F_{b}(x)=[ \underline{m}_{b}(x),\overline{m}_{b}(x)] \] where \[ \underline{m}_{b}(x):=\sup_{\delta >0} (\mathop{\rm ess\,inf} _{[x-\delta ,x+\delta ]} b),\quad \overline{m}_{b}(x):=\inf_{\delta >0} (\mathop{\rm ess\, sup}_ {[x-\delta ,x+\delta ]} b). \] In the case where $b(x)=sgn(x)$, then we have $F_{b}(0)=[-1,1]$. \subsection{Connection between Filippov's approach and Clarke's generalized gradient} We give in this subsection the connection between Filippov's differential inclusion and the Clarke's generalized gradient. \noindent\textbf{The Clarke's generalized gradient.} Let $V:\mathbb{R}^n\to \mathbb{R}$ be locally Lipschitz continuous. We define the generalized gradient of $V$ as \begin{equation} \partial_{c}V(x)=\overline{\mathop{\rm co}}\{ \lim_{x_i\to x}\nabla V(x_i),\; x_i\notin \Omega_{V}\cup N\}, \label{e4.5} \end{equation} where $\Omega_{V}$ is the set of Lebesgue measure zero where $\nabla V$ does not exist and $N$ is an arbitrary set of measure zero. \begin{lemma} \label{lem6} The map $F:\{ b:\mathbb{R}^{m}\to \mathbb{R}^n\} \to \{ g:\mathbb{R}^{m}\to 2^{\mathbb{R}^n}\} $ has the following properties: (1) Assume that $b:\mathbb{R}^{m}\to \mathbb{R}^n$ is locally bounded. Then $\exists N_{b}\subset \mathbb{R}^{m}$, $\lambda (N_{b})=0$ such that $\forall N\subset \mathbb{R}^{m}$, $\lambda (N)=0$. \[ F_{b}(x)=\overline{\mathop{\rm co}}\{ \lim_{x_i\to x}b( x_i),\;x_i\notin N_{b}\cup N\} . \] (2) Assume that $b$, $f:\mathbb{R}^{m}\to \mathbb{R}^n$ is locally bounded; then \[ F_{(b+f)}(x)\subset F_{b}(x) +F_{f}(x). \] (3) Assume that $b_{j}:\mathbb{R}^{m}\to \mathbb{R} ^{n_{j}} $ where $j\in \{ 1,2,\dots ,N\}$ are locally bounded; then \[ F_{(\overset{j=N}{\underset{j=1}{\Pi }}b_{j})}(x)\subset \overset{j=N}{\underset{j=1}{\Pi }}F_{b_{j}}(x). \] (4) Let $g$ $:\mathbb{R}^{m}\to \mathbb{R}^n$ is $C^1$, rank $Dg(x)=n$ and $b:\mathbb{R}^n\to \mathbb{R}^{p}$ be locally bounded; then \[ F_{b\circ g}(x)=F_{b}(g(x)). \] (5) Let $g:\mathbb{R}^{m}\to \mathbb{R}^{p\times n}$ (i.e. matrix valued) be $C^{0}$ and $b:\mathbb{R}^{m}\to \mathbb{R}^n$ be locally bounded; then $F_{gb}(x)=g(x) F_{b}(x)$, where $gb(x):=g(x)b( x)$. (6) Let $V:\mathbb{R}^{m}\to \mathbb{R}$ be locally Lipschitz continuous, then $F_{\nabla V}(x)=\partial_{c}V(x)$. \end{lemma} The proof of the above lemma can be found in Paden \textit{et al} \cite{p3}. \begin{remark} \label{rmk1} \rm (i) Since $V$ is locally Lipschitz, $|nabla V$ is defined almost everywhere and locally bounded (Rademacher's Theorem). By using Lemma \ref{lem5} we have $F_{\nabla V}(x)=\partial_{c}V(x)$. \\ (ii) In particular, if $V$ is Fr\^echet-differentiable at $x$, then \[ F_{\nabla V}(x)=\partial_{c}V(x)=\{V'(x)\} . \] \end{remark} \section{Main results} In this section we establish generalized stochastic maximum principle for diffusion without differentiability assumptions on the coefficients $b$, $\sigma ^{j}$ satisfies the assumptions \eqref{e2.1} and \eqref{e2.2}, so we are going to weaken the differentiability assumptions on this coefficients. This method is described briefly by the following. Let $E$ be a Banach space, $E^{\ast }$ its dual and let $f:V\to \mathbb{R}^d $ where $V$ is a closed subset of $E$, satisfiying the following conditions: \begin{itemize} \item[(H1)] The exist $\lambda_n >0$, and $f^n:$ $B$ $\to $ $\mathbb{R}^d $ G\^{a}teaux-differentiable in the ball $(y+\lambda_n B)$ and $f^n(y)=f(y)$. \item[(H2)] There exists $\varepsilon_n >0$ such that $\frac{\varepsilon_n }{\lambda_n }\to 0$ as $n\to +\infty $, $f^n$ is continuous and $|f^n(x)-f(x)| \leq \varepsilon_n $ for $x\in (y+\lambda_n B)$. \end{itemize} We shall approximate the drift $b$ and the diffusion $\sigma ^{j}$ by a sequence of smooth functions $b^n$ and $\sigma ^{j,n}$ by using the following regularization Let $\xi_n :\mathbb{R}\to \mathbb{R}$ be a positive $C^{\infty }$ application vanishing out of the interval $[-\varepsilon_n ,\varepsilon_n ]$ such that $\int_{\mathbb{R}}\xi_n (x)dx=1$ and $\lim_{n\to +\infty }\varepsilon_n \to 0$. We denote $\rho_n =\prod_{j=1}^{j=d} \xi_n (x^{j})$ where $x=(x^1,\dots ,x^d )\in \mathbb{R}^d $. $\rho_n $ is a $C^{\infty }$ function with compact support. We define the following smooth functions $b^n=b\ast \rho_n $ and $\sigma ^{j,n}=\sigma ^{j}\ast \rho_n $ obtained by the convolution of all the components of $b$ and $\sigma ^{j}$ with $\rho_n $. We give in the next theorem the following proprieties satisfied by these functions. \begin{lemma} \label{lem7} (1) $b^n:[0,T]\times \mathbb{R}^d \mathbb{\times A} \to \mathbb{R}^d $ and $\sigma ^{j,n}:[0,T]\times \mathbb{R}^d \to \mathbb{R}^d $ are Borelian. (2) $b^n$ and $\sigma ^{j,n}$ are $k$-Lipschizian in the second variable $x$ and has linear growth. (3) $b^n$ and $\sigma ^{j,n}$ are a $\mathcal{C}^{\infty}$ in $x$, and for all $(t,x,u)\in [0,T]\times \mathbb{R}^d \times \mathbb{A}$: $|b^n(t,x,u)-b(t,y,u) |\leq k\varepsilon_n $, and $|\sigma ^n(t,x)-\sigma (t,y)|\leq k\varepsilon_n $. \end{lemma} The statements in the above lemma are a classical facts; see Frankowska \cite{f2} and Mezerdi \cite{m1}. Note that $b^n$ and $\sigma ^{j,n}$ satisfies conditions (H1) and (H2) with $\lambda_n =\sqrt{\varepsilon_n }$. So we can define \begin{gather*} \partial_{c}b(t,y,u)=\cap_{n\geq 0} \overline{\mathop{\rm co}} \cup_{k\geq n} [b_{x}^{k}( t,x,u): x\in (y+\lambda_n B)]. \\ \partial_{c}\sigma ^{j}(t,y)=cap_{n\geq 0} \overline{\mathop{\rm co}}\cup_{k\geq n} [\sigma _{x}^{j,k}(t,x): x\in (y+\lambda_n B)]. \end{gather*} Let $X^n$ be the solution of \begin{equation} \begin{gathered} dX_t^n=b^n(t,X_t^n,u_t^n)dt+\sigma ^n(t,X_t^n)dB_t, \\ X_0^n=x. \end{gathered} \label{e5.1} \end{equation} Let $\Phi_n (s,t)$ denote the fundamental solution of the linear equation $(s\geq t)$, \begin{equation} \begin{gathered} d\Phi_n (t)=b_{x}^n(t,X_t^n,u_t^n)\Phi _n (t)dt+\sum_{1\leq j\leq d} \sigma _{x}^{j,n}(t,X_t^n)\Phi_n (t)dB_t^{j},\\ \Phi_n (s,s)=I_{d}. \end{gathered} \label{e5.2} \end{equation} The following lemma will play an interesting role below. \begin{lemma} \label{lem8} There exists $u^n\in \mathcal{U}_{\rm ad}$ and $\lambda_n =\sqrt{\delta_n }$ such that \begin{itemize} \item[(i)] $d(u^n,u^{\ast })\leq \lambda_n $. \item[(ii)] $\mathbb{E}[H_n (t;X_t^n,u_t^n,p_t^n)]\geq \mathbb{E}[ H_n (t;X_t^n,v,p_t^n)]-\lambda_n $, for all $v\in A$, $dt-a.e$. for all $v\in \mathbb{A}$. The associate adjoint process is given by \[ p_n (t)=-\mathbb{E}\{ \Phi_n ^{\ast }(T,t) g_{x}(X_T^n)|_{\mathcal{F}_t}\} . \] \end{itemize} \end{lemma} \begin{proof} Since $u_t^n$ is optimal for the cost $J_n (u)+(\delta_n )^{1/2}d(u,u^n)$ then, we proceed as in Lemma \ref{lem2} to derive a maximum principle for $u_t^n$. The rest of the proof is similar to the approximate maximum principle, see Yong \textit{et al} \cite{y1}. \end{proof} Notice that since the gradient $g_{x}$ is continuous, then we have $g_{x}(X_T^n)\to g_{x}(\hat{X}_T)$ $\mathbb{P}$-a.s. as $n\to +\infty $. \subsection{Weak limit solution with stable convergence} The limit of $\Phi_n $ is proved by using the stable convergence of probability measure introduced by Jacod \textit{et al} \cite{j1}. This convergence is contained between convergence in law and convergence in probability. We shal make use of the notion of good extension of a filtered probability space. \begin{definition} \label{def2} \rm The space $(\overline{\Omega },\overline{\mathcal{ \mathcal{F} }},\overline{\mathcal{F}}_t,\overline{\mathbb{P}} )$ is a good extension of $(\Omega ,\mathcal{ \mathcal{F} },\mathcal{F}_t,\mathbb{P})$ if the following conditions are satisfied \begin{itemize} \item[(1)] $\overline{\Omega } =\Omega \times \hat{\Omega}$ where $\hat{\Omega}$ is an auxiliary space. \item[(2)] $\mathcal{F} \in \overline{\bar{\mathcal{F}}}$; i.e., $A\times \hat{\Omega}\in \overline{\mathcal{F}}$, for $A\in \mathcal{F} $, $\mathcal{F}_t\in \overline{\mathcal{F}}_t$. \item[(3)] $\overline{\mathbb{P}}(A\times \hat{\Omega} )=\mathbb{P}(A)$ for $A\in \mathcal{F} $. \item[(4)] Each $(\mathcal{F}_t-\mathbb{P})$ martingale is a $(\overline{\mathcal{F}}, -\overline{\mathbb{P}})$ martingale. \end{itemize} \end{definition} Clearly, since $b^n$ and $\sigma ^{j,n}$ are $k$-Lipshitz in $x$ and continuously differentiable, then the matrix of partial derivatives $b_{x}^n$ and $\sigma_{x}^{j,n}$ are bounded by the Lipschiz constant $k$. Let us define the canonical spaces associated to the processes $b_{x}^n(t,X_t^n,u_t^n)$, $\sigma_{x}^{j,n}(t,X_t^n)$, $(\sigma_{x}^n)^{\ast }(\sigma_{x}^n)$ and $\Phi_n (t)$: (1) Let $\Omega_1$, the canonical space of $b_{x}^n(t,X_t^n,u_t^n)$, define by the following: Let $D_1=\{ \beta_1:[0,T]\to \mathbb{R}^d \otimes \mathbb{R}^d \text{ measurable such that }\| \beta_1\| \leq c\} $. It is clear that $b_{x}^n$ take values in $D_1$ which is uniformly integrable subset of $\mathbb{L}^1([0,T],\mathbb{R}^d \otimes \mathbb{R}^d )$, hence it is a relatively compact subset with respect to the weak topology $\sigma (\mathbb{L}^1,\mathbb{L}^{\infty })$ (Dunford-Pettis Thoerem). Let $\Omega_1=\overline{D}_1$ (weak or strong closure of $D_1$, because $D_1$ is convex). We define $\mathcal{F}^1$ the filtration of the coordinates generated by the subsets of the form \[ A=\{ \beta_1\in \Omega_1:\int_0^{t}\langle \beta_1,f(s)\rangle ds\leq c,\text{ where }c\in \mathbb{R},\;f\in L^{\infty }([0,T],\;\mathbb{ R}^d \otimes \mathbb{R}^d )\} . \] $(\Omega_1$, $\mathcal{F}^1$, $\mathcal{ \mathcal{F} }_t^1)$ is the canonical space associated to the process $b_{x}^n(t,X_t^n,u_t^n)$. (2) Let $D_2=\{ \beta_2^{j} :[0,T]\to \mathbb{R}^d \otimes \mathbb{R}^d \text{ measurable such that }\| \beta_2^{j}\| \leq c\} $. It is clear that $\sigma_{x}^{j,n}(t,X_t^n)$ take values in $D_2$ which is uniformly integrable subset of $\mathbb{L}^1([0,T])$, hence it is a relatively compact subset with respect to the weak topology $\sigma (\mathbb{L}^1, \mathbb{L}^{\infty })$ (Dunford-Pettis Thoerem). Let $\Omega_2= \overline{D}_2$\textit{\ }(weak or strong closure of $D_2$, because $D_2$ is convex). Then $\Omega_2$ are compact metrisible spaces. We define $\mathcal{F}^2$ the filtration of the coordinates generated by the subsets of the form \[ B=\{ \beta_2^{j}:\int_0^{t}\langle \beta _2^{j},h(s)\rangle ds\leq c,\text{ where }c\in \mathbb{R },\;h\in L^{\infty }([0,T],\;\mathbb{R} ^d \otimes \mathbb{R}^d )\} . \] where $(\Omega_2$, $\mathcal{F}^2$, $\mathcal{F}_t^2)$ is the canonical space associated to the process $\sigma_{x}^{j,n}(t,X_t^n)$. (3) Let $\Omega_3=\{ a\in \mathbb{L}^2([0,T])\text{ such that }\| a\| \leq c\} $, $(\Omega_3,\;\mathcal{ \mathcal{F} }^{3},\;\mathcal{F}_t^{3})$ is the canonical space associated to the process $a_n =\sigma_{x}^{j,n}( t,X_t)^{\ast }.\sigma_{x}^{j,n}(t,X_t)$. (4) $\Phi_n $ has a continuous trajectories, then $\Phi_n :(\Omega ,\mathcal{F},\mathcal{F} _t,\mathbb{P})\to \Omega_{4}$ where $\Omega_{4}$ is the space of continuous functions from $[0,T] to \mathbb{R}^d \otimes \mathbb{R}^d $ equipped with the topology of uniform convergence and $\mathcal{F}_t^{4}$ the filtration coordinates. By this definitions, we introduce the product space \[ \overline{\Omega }=\Omega \times \Omega_1\times \Omega_2\times \Omega _3\times \Omega_{4}, \] equipped with the filtration \[ \overline{\mathcal{F}}_t =\cap_{s\geq t} \mathcal{ \mathcal{F} }_{s}\otimes \mathcal{F}_{s}^1\otimes \mathcal{ \mathcal{F} }_{s}^2\otimes \mathcal{F}_{s}^{3}\otimes \mathcal{ \mathcal{F} }_{s}^{4}. \] We associate with $(b_{x}^n(.,X^n,u^n),\sigma _{x}^{j,n}(.,X^n),\;a_n (.,X^n),\Phi_n )$ the randomized variable $\overline{\mathbb{P}}_n $ which is a probability measure defined on $(\overline{\Omega },\overline{ \mathcal{F}})$ by \[ \overline{\mathbb{P}}_n (w,w_1,w_2,w_3,w_{4})=\mathbb{P} (w)\mathbb{\delta }_{b_{x}^n}(dw_1)\mathbb{ \delta }_{\sigma_{x}^{j,n}}(dw_2)\mathbb{\delta } _{a_n }(dw_3)\mathbb{\delta }_{\Phi_n }(dw_{4}), \] where $\delta_{x}$ the \textit{Dirac measure} at $x$ and $\Phi_n $ solution of \eqref{e5.2}. \begin{theorem} \label{thm1} The space $(\bar{\Omega},\overline{\mathcal{F}},\overline{ \mathcal{F}}_t,\overline{\mathbb{P}}_n )$ is a good extension of the space $(\Omega ,\mathcal{F},\mathcal{ \mathcal{F} }_t,\mathbb{P})$, moreover the canonical process $\Phi _t(w,w_1,w_2,w_3,w_{4})=w_{4}(t)$ is a solution of the stochastic differential equation \begin{gather*} d\Phi (t)=\beta_1(t)\Phi (t) dt+\sum_{1\leq j\leq d}\beta_2^{j}(t)\Phi (t)dB_t^{j} \\ \Phi (0)=I_{d}, \end{gather*} on the space $(\overline{\Omega },\overline{\mathcal{F}}, \overline{\mathcal{F}}_t,\overline{\mathbb{P}}_n )$, where $\hat{B}_t$ is a Brownian motion which is independent of $B_t$. \end{theorem} \begin{proof} Let $\mathbb{E}$, $\overline{\mathbb{E}}_n $ and $\overline{ \mathbb{E}}$ the expectation with respect to the randomized probability $\mathbb{P}$, $\overline{\mathbb{P}}_n $ and $\overline{\mathbb{P}}$ respectively. It is sufficient to verify that all $(\mathcal{ \mathcal{F} }_t-\mathbb{P})$ martingale is$ (\overline{\mathcal{ \mathcal{F} }}_t$, $\overline{\mathbb{P}}_n )$ martingale. \end{proof} The sequence $\overline{\mathbb{P}}_n $ converge with respect to stable convergence to a limit $\overline{\mathbb{P}}$ if and only if: \[ \lim_{n\to +\infty } \overline{\mathbb{P}}_n [ g(w,w_1,w_2,w_3,w_{4})]=\overline{\mathbb{P}}[ g(w,w_1,w_2,w_3,w_{4})]. \] For every function $g: \overline{\Omega }\to \mathbb{R}$ measurable bounded such that: $g(w,.,.,.,.)$ is continuous for all $w\in \Omega $. To prove that the sequence $\overline{\mathbb{P}}_n $ is relatively compact with respect to stable convergence, it is sufficient to prove that the projections of $\overline{\mathbb{P}}_n $ on $\Omega_1$, $\Omega_2$, $\Omega_3$ and $\Omega_{4}$ are relatively compact in the topology of narrow convergence. \begin{lemma} \label{lem9} (i) Let $\Phi_n $ be the solution of \eqref{e5.2} then there exists a positive constant $M$ such that for all $n\in \mathbb{N}$ and $s,t\in [0,T]$, \[ \mathbb{E}(\| \Phi_n (t)-\Phi_n ( s)\| ^{4})\leq M| t-s| ^2. \] (ii) The sequence $\overline{\mathbb{P}}_n $ is relatively compact with respect to the topology of stable convergence \end{lemma} \begin{proof} Statement (i) follows from the Schwartz and Burkholder-Davis-Gandy inequalities. (ii) Since $\Omega_1$ (resp. $\Omega_2$ ) is compact, then the sequence of the projections of $\overline{\mathbb{P}}_n $ on $\Omega_1$ (resp. $\Omega_2$) is tight, then relatively compact (Prokhorov's Theorem). Moreover the projections of $\Phi_n $ on $\Omega_{4}$ coincides with the distributions of $\Phi_n $ which satisfy (i) of Lemma \ref{lem9}, then $\Phi_n $ is relatively compact with respect to the topology of stable convergence. \end{proof} \begin{theorem} \label{thm2} Let $\overline{\mathbb{P}}$ be a limit of $\overline{\mathbb{P}}_n $ (in the sense of stable convergence), then $(\overline{\Omega }, \overline{\mathcal{F} },\overline{\mathcal{F} }_t,\overline{\mathbb{P}} )$ is a good extension of the space $(\Omega , \mathcal{F},\mathcal{F}_t,\mathbb{P})$. Moreover the canonical process $\Phi_t(w,w_1,w_2,w_3,w_{4})=w_{4}(t)$ satisfies \begin{equation} \begin{gathered} d\Phi (t)=\beta_1(t)\Phi (t) dt+\sum_{1\leq j\leq d}\beta_2^{j}(t)\Phi ( t)dB_t^{j}+\sum_{1\leq j\leq d}\hat{\beta}_2^{j}( t)\Phi (t)d\hat{B}_t^{j} \\ \Phi (s,s)=I_{d}, \end{gathered} \label{e5.3} \end{equation} \end{theorem} \begin{proof} For doing this we need the techniques in Mezerdi \cite{m1} and in Jacod \textit{et al} \cite{j1}, so it is sufficient to prove that all $(\mathcal{F}_t-\mathbb{P} )$ martingale is $(\overline{\mathcal{F}}_t-\overline{ \mathbb{P}})$ martingale. Let $M_t$ be a $(\mathcal{F}_t-\mathbb{P)}$ martingale, and $\mathcal{Z}$ a bounded\ random variable, $\overline{\mathcal{F}}_{s}$ measurable such that $(w_1,w_2,w_3,w_{4})\to \mathcal{Z}(w_1,w_2,w_3,w_{4})$ is continuous. According to Theorem \ref{thm1}, the space $(\bar{\Omega},\overline{ \mathcal{F}},\overline{\mathcal{F}}_t,\overline{ \mathbb{P}}_n )$ is a good extension of $(\Omega ,\mathcal{ \mathcal{F} },\mathcal{F}_t,\mathbb{P})$. $(s\leq t)$. Then $\overline{\mathbb{E}}_n [M_t\mathcal{Z}]= \overline{\mathbb{E}}_n [M_{s}\mathcal{Z}]$ for all $n\in \mathbb{N}$. Since $\overline{\mathbb{P}}$ is a limit of $\overline{\mathbb{P}}_n $ we have \[ \overline{\mathbb{E}}[M_t\mathcal{Z}] =\lim_{n\to +\infty } \overline{\mathbb{E}}_n [M_t\mathcal{Z}]= \lim_{n\to +\infty } \overline{\mathbb{E}}_n [M_{s} \mathcal{Z}]=\overline{\mathbb{E}}[M_{s}\mathcal{Z}]. \] The set of random variables $\mathcal{Z}$ are $\mathcal{F}_{s}$ measurable bounded continuous in $(w_1,w_2,w_3,w_{4})$ generates a $\sigma -$field contained between $\overline{\mathcal{F}}_{s^{-}}$ and $\overline{\mathcal{ \mathcal{F} }}_{s}$. Then $\overline{\mathbb{E}}[( M_t-M_{s})| \overline{\mathcal{F}}_{s^{-}}]=0$, the right continuity of $M_t$ implies that $\overline{\mathbb{E}} [(M_t-M_{s})| \overline{\mathcal{F}}_{s} ]=0$. $\Phi_t$ takes values in $\mathbb{R}^d \otimes \mathbb{R}^d $ then $\Phi_t$ is a solution of \eqref{e5.3}. Applying similar techniques as those in \cite{j1} where it is sufficient to prove that for all $(h_1,h_2)\in \mathbb{R}^d \times \mathbb{R}^d $ \[ M_t(h_1,h_2)=h_1B_t+h_1(\Phi_{s}-\Phi_0 -\int_0^t \beta_1(s)\Phi_{s}ds). \] Here $M_t$ is a $(\overline{\mathcal{F} }_t-\overline{\mathbb{P}} )$ martingale and have a quadratic variation of the form \[ A_t(h_1,h_2)=h_1^2t+2h_1h_2\overset{t}{\underset{0 }{\int }}\beta_1(s)\Phi_{s}ds+h_2^2\underset{0}{\overset {t}{\int }}a(s)\Phi_{s}^2ds. \] Note that $M_t$ is an $(\mathcal{\bar{\mathcal{F}}}_t-\mathbb{ \bar{P}}_n )$ martingale for all $n\in \mathbb{N}$ and $(w,w_1,w_2)\to M_t(h_1,h_2)$ is continuous. To pass to the limit, we must show that $M_t$ is sufficiently integrable. Because $\beta_1(t)$, and $\beta _2^{j}(t)$ are bounded and $\mathbb{E}[(\sup_{t\leq T}| \Phi_{s}^{i}| )^{p}]<+\infty $. We deduce that $\sup_n \overline{\mathbb{E}}_n [| M_t(h_1,h_2)| ^2]<+\infty $, $\forall p\geq 1$. Then if $\mathcal{Z}$ is a bounded $\mathcal{\bar{\mathcal{F}}}_t$-measurable random variable continuous in $(w_1,w_2,w_3,w_{4})$ we have $\overline{\mathbb{E}}_n [(M_t-M_{s})\mathcal{Z} ]\to \overline{\mathbb{E}}[(M_t-M_{s}) \mathcal{Z}]$ as $n\to +\infty $. Hence $M_t( h_1,h_2)$ is an $(\overline{\mathcal{F}}_t- \overline{\mathbb{P}})$ martingale. The extra term $\sum_{1\leq j\leq d}\hat{\beta} _2^{j}(t)\Phi_td\hat{B}_t^{j}$ comes from the It\^{o} decomposition Theorem for martingales adapted to a filtration supporting a Brownian motion. \end{proof} The same method can be performed for $M_t^2(h_1,h_2)-A_t(h_1,h_2)$. Now we are ready to state our main result. \begin{theorem} \label{thm3} Let $\hat{u}$ be an optimal control and $\hat{X}$ corresponding trajectory, then there exists a probability $\overline{\mathbb{P}}$ on the space $(\overline{\Omega },\overline{\mathcal{F} },\overline{\mathcal{F} } _t)$ such that \begin{itemize} \item[(1)] $\overline{\mathbb{E}}[H(t,\hat{X}_t,\hat{u} _t,\overline{p}_t)]=\max_{v\in \mathbb{A}} \overline{\mathbb{E}}[H(t,\hat{X}_t,v,\overline{p}_t)]$, dt-a.e. \item[(ii)] $\overline{p}_t=-\overline{\mathbb{E}}[\Phi ^{\ast }(T,t)g_{x}(\hat{X}_T)|_{\overline{\mathcal{ \mathcal{F} }}_t}]$. where $\Phi ^{\ast }(T,t)$ is the transpose of $\Phi (T,t)$ given by \eqref{e5.3}. \end{itemize} \end{theorem} \begin{proof} According to Lemma \ref{lem8} there exists a control $u_t^n$ such that $d(u_t^n,\hat{u}_t)\leq \lambda_n $. So it is sufficient to prove that \[ \lim_{n\to +\infty } \mathbb{E}[H(t,X_t^n,u_t^n,p_n (t))] =\overline{\mathbb{E}}[H(t,\hat{X}_t,\hat{u}_t,\overline{p}(t))]. \] \end{proof} \subsection{Filippov approach and the support of a limit $\overline{\mathbb{P}}$} Our goal in this subsection is to prove a stochastic maximum principle for the optimal controls without differentiability assumptions. Let $\overline{\mathbb{P}}$ be a stable limit of $(\overline{ \mathbb{P}}_n )$, now we give in this section the connection between the support of $\overline{\mathbb{P}}$ and the generalized Filippov's set of $b$ and $\sigma ^{j}$ at $(\hat{X}_t,\hat{u}_t)$. Let $\widetilde{\Omega }=\Omega \times \Omega_1\times \Omega _2\times \Omega_3$, $\widetilde{\mathcal{F}}_t=\cap_{s\geq t} \mathcal{F}_{s}\otimes \mathcal{F} _{s}^1\otimes \mathcal{F}_{s}^2\otimes \mathcal{F} _{s}^{3}$ and $\widetilde{\mathbb{P}}$ is the projection $\overline{\mathbb{P}}$ on $\widetilde{\Omega }$ then we have \[ \widetilde{\mathbb{P}}(dw,dw_1,dw_2,dw_3)=\overline{ \mathbb{P}}(dw,dw_1,dw_2,dw_3,\Omega_{4}), \] where $(\widetilde{\Omega },\widetilde{\mathcal{F}}, \widetilde{\mathcal{F}}_t,\widetilde{\mathbb{P}})$ is a good extension of $(\Omega ,\mathcal{F},\mathcal{F} _t,\mathbb{P})$ and $\widetilde{\mathbb{P}}$ is a stable limit of $\widetilde{\mathbb{P}}_n $, where $\widetilde{\mathbb{P}}_n $ denotes the projection de $\overline{\mathbb{P}}_n $ on $\widetilde{\Omega }$. Moreover if we consider equation \eqref{e5.3} on the space $(\widetilde{\Omega },\widetilde{\mathcal{F}}, \widetilde{\mathcal{ \mathcal{F} }}_t,\widetilde{\mathbb{P}})$, then it has a strong unique solution. If we denote by $\widetilde{\Phi }_t( w,w_1,w_2,w_3)$ the solution on $(\widetilde{\Omega }, \widetilde{\mathcal{F}},\widetilde{\mathcal{F}}_t, \widetilde{\mathbb{P}})$ then $\widetilde{\Phi }_t=\Phi_t\overline{\mathbb{P}}$-a.s., and \[ \overline{\mathbb{P}}(dw,dw_1,dw_2,dw_3,dw_{4})= \widetilde{\mathbb{P}}(dw,dw_1,dw_2,dw_3)\delta_{ \widetilde{\Phi }_t(w,w_1,w_2,a)}(dw_{4}). \] If $D$ denotes a support of the probability $\widetilde{\mathbb{P}} (dw,dw_1,dw_2,dw_3)$, according to Jacod \textit{et al} \cite{j1} there exists a subsequence such that $(\mathbb{P}-a.s)$. $D_{w}^1$: the set of limit points of the subsequence of $b_{x}^n(t,X^n(w),u^n(w))$ where $w$ is fixed. $D_{w}^2$: the set of limit points of the subsequence of $\sigma _{x}^{j,n}(t,X^n(w))$ where $w$ is fixed. The Filippov differential inclusion allows us to expresses the generalized derivative of $b$ and $\sigma $ in terms of well defined smooth approximations. This advantage enables to give the following theorem. \begin{theorem} \label{thm4} (i) For almost all $w$, there exists a subsequence $b_{x}^n( t,X_t^n(w),\hat{u}_t(w))$ and $\sigma _{x}^{j,n}(t,X_t^n(w))$ such that \begin{gather*} \lim_{n\to +\infty }b_{x}^n(t,X_t^n(w),\hat{ u}_t(w))=\beta_1(t)\quad \text{in } \mathbb{L}^1(dt). \\ \lim_{n\to +\infty }\sigma_{x}^{j,n}(t,X_t^n( w))=\beta_2^{j}(t)\text{ in }\mathbb{L}^1(dt). \end{gather*} (ii) For almost every $t\in [0,T]$. we have $\beta _1(t)\in F_{\nabla b}(t,\hat{X}_t,\hat{u}_t)$ and $\beta_2^{j}(t)\in F_{\nabla \sigma ^{j}}(t,\hat{X}_t)$. \end{theorem} \begin{proof} (i) According to the definition of $D_{w}^1$ there exists a subsequences $b_{x}^n(t,X_t^n(w)$, $u_t^n(w))$ which converges weakly in $\mathbb{L}^1(dt)$ to $\beta_1(t)$. Moreover, \[ \mathbb{E}[\int_0^{T}|b_{x}^n(s,X_t^n,u_t^n)-b_{x}^n(t,X_t^n,\hat{u}_t) |dt]\leq Md(u^n,\hat{u}_t). \] Then there exists a subsequence such that \[ [\int_0^{T}|b_{x}^n( s,X_t^n,u_t^n)-b_{x}^n(t,X_t^n,\hat{u}_t) |dt]\to 0\quad \text{as }n\to +\infty ,\; \mathbb{\tilde{P}}\text{-a.s.}, \] using a similar proof as for $\lim_{n\to +\infty }\sigma _{x}^{j,n}(t,X_t^n(w))=\beta_2^{j}(t)$. (ii) Let $\beta_1(t)\in D_{w}^1$, according to (i) Theorem \ref{thm4}, there exists a subsequence $b_{x}^n(t,X_t^n,\hat{u}_t)$) which converges to $\beta_1(t)$ in $\mathbb{L}^1(dt)$. Moreover, we have \[ \lim_{n\to +\infty } \mathbb{E}[\sup_{t\leq T} |X_t^n-\hat{X}_t|^2]=0, \] so we can extract a subsequences such that $\sup_{t\leq T} |X_t^n-\hat{X}_t|\leq \lambda_n \mathbb{P}$-a.s., then there exists $n\in \mathbb{N}$ such that \[ b_{x}^n(t,X_t^n,u_t^n)\in \cup_{k\geq n} [b_{x}^{k}(t,X_t,\hat{u}_t):X_t\in (\hat{X} _t+\lambda_n B)]. \] According to Mazur's Lemma, there exists a sequence of convex combinations which converges strongly in $\mathbb{L}^1(dt)$ to $\beta_1(t)$. Then we have \[ \beta_1(t)\in \cap_{n\geq 0} \overline{\mathop{\rm co}}\cup_{k\geq n} [b_{x}^{k}( t,x,u): x\in (y+\lambda_n B)]\; dt-a.e. \] According to the property (6) of Lemma \ref{lem6} we have \[ \beta_1(t)\in F_{\nabla b}(t,\hat{X}_t,\hat{u}_t)\, dt-a.e. \] Applying a similar proof for $\beta_2^{j}(t)\in F_{\nabla \sigma ^{j}}(t,\hat{X}_t)$. This completes the proof. \end{proof} \begin{remark} \label{rmk2} \rm Using the same method of proof, we obtain a more general non-smoothness result for the stochastic maximum principle without differentiability assumptions in which the control enters both the drift and the diffusion coefficients where the set of controls is necessarily convex. \end{remark} \subsection*{Acknowledgements} The author thanks the anonymous referee who offered many useful remarks and suggestions that improved the first version of this manuscript. The author would like to thank Prof. Julio G. Dix, Texas State University - San Marcos, and Prof. Y. Ouknine, Marrakech University for their valuable remarks and kind help. \begin{thebibliography}{00} \bibitem{a1} J. P. Aubin, A. Cellina; \emph{Differential inclusions}. Grundlehren Mathematischen Wissenschaften, Volume 264, Springer-Verlag, Berlin (1984). \bibitem{a2} V. I. Arkin, M. T. Saksonov; \emph{Necessary optimality conditions for stochastic differential equations}, Soviet. Math. Dokl. 20 (1979), pp. 1-5. \bibitem{b1} K. Bahlali, B. Mezerdi and Y. Ouknine; \emph{The maximum principle for optimal control of diffusion with non-smooth coefficients}. Stochastics. Vol. 57, (1996), pp. 303-316. \bibitem{b2} K. Bahlali, B. Djehiche, B. Mezerdi; \emph{On the stochastic maximum principle in optimal control of degenerate diffusions with Lipschitz coefficients}, Appl. Math. and Optim., Vol. 56 (2007), pp. 364-378. \bibitem{b3} A. Bensoussan; \emph{Lectures on stochastic contr}. In Lect. Notes in Math. 972, Springer-Varlag (1983), pp. 1-62. \bibitem{b4} J. M. Bismut; \emph{An introductory approach to duality in Optimal Stochastic Control}, SAIM, Rev., Vol. 20, no. 1, Jan. (1978). \bibitem{b5} V. Borkar; \emph{Controlled diffusion processes. Probabilite surveys}. Vol. 2 (2005), pp. 213-244. \bibitem{b6} R. Buckdahn, Y. Ouknine, M. Quincampoix; \emph{On limiting values of stochastic differential equations with small noise intensity tending to zero}. Bull. Sci. Math. 133 (2009), pp. 229-237. \bibitem{e1} I. Ekeland; \emph{On the variational principle}, J. Math. Anal. Appl. 47 (1974), pp. 443-474. \bibitem{f1} A. F. Filippov; \emph{Differential equations with discontinuous right-hand sides}. Mathematics and Its applications: Soviet Series, 18. Dordrecht.etc., Kluwer Academic Publishers. (1988). \bibitem{f2} H. Frankowska; \emph{The first ordre necessary conditions for optimality for nonsmooth variational and control problems}, SAIM J. Control optimal., Vol. 22, no. 1 (1984), pp. 1-12. \bibitem{h1} U. G. Haussmann; \emph{A stochastic maximum principle for optimal of diffusions}, Pitman Reseearch Notes in Math. Series, 151.(1986). \bibitem{j1} J. Jacod and J. Memin; \emph{Sur un type de convergence interm\'{e}diaire entre la convergence en loi et la convergence en probabilit\'{e}}. Seminar on probability XV, Lecture Notes in Math. 850 (1981), pp. 529-546. Springer-Verlag, Berlin. \bibitem{k1} H. J. Kushner; \emph{Necessary conditions for continuous paramter stochastic optimization problems}, SAIM J. Control Optimal., Vol 10 (1972), pp. 550-565. \bibitem{m1} B. Mezerdi; \emph{Necessary conditions for optimality for a diffusion with a non-smooth drift}. Stochastics, Vol. 24 (1988), pp. 305-326. \bibitem{p1} S. Peng; \emph{A general stochastic maximum principle for optimal control problems}. SIAM J. Contr. Optim. 28, no. 4 (1990), pp. 966-979. \bibitem{p2} L. S. Pontryagin, V. G. Boltanski, R. V. Gamkrelidze; \emph{The mathematical theory of optimal processes}. Intersciene New York, (1962) \bibitem{p3} B. E. Paden, S. S. Sastry; \emph{A calculus for Computing Filippov's Differential Inclusion with Application to the Variable Structure Control of Robot Manipulators}. IEEE (1987), Vol. 34, No. 1. \bibitem{w1} J. Warga; \emph{Necessary conditions without differentiability assumptions in optimal control}, jour. of Diff. Equations 18 (1975), pp. 41-62. \bibitem{y1} J. Yong and X. Y. Zhou; \emph{Stochastic Controls. Hamiltonian Systems and HJB Equations}. Springer-Verlag. New York, (1999). \end{thebibliography} \end{document}