%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\documentclass[11pt]{article}
%\documentclass[11pt]{elsarticle}
%\renewcommand{\baselinestretch}{2}
\topmargin 0in
\oddsidemargin 0in
\evensidemargin 0in
\textwidth 6.2in
\textheight 8.5in
\parskip .1in
\usepackage{graphicx}
%\renewcommand{\baselinestretch}{2}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newcommand{\1}{\'{\i}}
\newcommand{\bea}{\begin{eqnarray}}
\newcommand{\eea}{\end{eqnarray}}
\newcommand{\be}{\begin{eqpation}}
\newcommand{\ee}{\end{equation}}
\newcommand{\tras}{^{{\mbox{\footnotesize\sc t}}}}
\newcommand\ba {{\bf{a}}}
\newcommand\bb {{\bf{b}}}
\newcommand\bc {{\bf{c}}}
\newcommand\bd {{\bf{d}}}
\newcommand\befe {{\bf{f}}}
\newcommand\bh {{\bf{h}}}
\newcommand\bo {{\bf{o}}}
\newcommand\bt {{\bf{t}}}
\newcommand\bu {{\bf{u}}}
\newcommand\bv {{\bf{v}}}
\newcommand\bw {{\bf{w}}}
\newcommand\bx {{\bf{x}}}
%\newcommand\bx {X}
\newcommand\by {{\bf{y}}}
\newcommand\bz {{\bf{z}}}
\newcommand\bA {{{A}}}
\newcommand\bB {{\bf{B}}}
\newcommand\bC {{\bf{C}}}
\newcommand\bD {{\bf{D}}}
\newcommand\bF {{\bf{F}}}
\newcommand\bG {{\bf{G}}}
\newcommand\bI {{\bf{I}}}
\newcommand\bL {{{L}}}
\newcommand\bM {{\bf{M}}}
\newcommand\bP {{\bf{P}}}
\newcommand\bR {{\bf{R}}}
\newcommand\bS {{\bf{S}}}
\newcommand\bU {{\bf{U}}}
\newcommand\bV {{\bf{V}}}
\newcommand\bW {{\bf{W}}}
\newcommand\bX {{\bf{x}}}
\newcommand\bY {{\bf{Y}}}
\newcommand\bZ {{\bf{Z}}}
\newcommand\wbZ {\widehat{\bZ}}
\newcommand\wefe {\widehat{\befe}}
\newcommand\wf {\widehat{f}}
\newcommand\wF {\widehat{F}}
\newcommand\wg {\widehat{g}}
\newcommand\whrob{\widehat{h}_{\mbox{\sc r}}}
\newcommand\werre {\widehat{r}}
\newcommand\wese {\widehat{s}}
\newcommand\wbz {\widehat{\bz}}
\newcommand\wz {\widehat{z}}
\newcommand\wy {\widehat{y}}
\newcommand\wbh {\widehat{\bh}}
\newcommand\wh {\widehat{h}}
\newcommand\wH {\widehat{H}}
\newcommand\wB {\widehat{B}}
\newcommand\wC {\widehat{C}}
\newcommand\wD {\widehat{D}}
\newcommand\wN {\widehat{N}}
\newcommand\wcL {\widehat{\cal L}}
\newcommand\wcR {\widehat{\cal R}}
\newcommand\wL {\widehat{L}}
\newcommand\wR {\widehat{R}}
\newcommand\wu {\widehat{u}}
\newcommand\wtbefe {\widetilde{\befe}}
\newcommand\wtbA {\widetilde{\bA}}
\newcommand\wtbU {\widetilde{\bU}}
\newcommand\wtf {\widetilde{f}}
\newcommand\wtu {\widetilde{u}}
\newcommand\wtY {\widetilde{Y}}
\newcommand\wty {\widetilde{y}}
\newcommand\bmu {\mbox{\boldmath $\mu$}}
\newcommand\bla {\mbox{\boldmath $\lambda$}}
\newcommand\bdelta {\mbox{\boldmath $\delta$}}
\newcommand\boldeta {\mbox{\boldmath $\eta$}}
\newcommand\brho {\mbox{\boldmath $\rho$}}
\newcommand\bbe {\mbox{$\bbeta$}}
\newcommand\bLam {\mbox{\boldmath $\Lambda$}}
\newcommand\bGa {\mbox{\boldmath $\Gamma$}}
\newcommand\bphi {\mbox{\boldmath $\phi$}}
\newcommand\bSi {\mbox{\boldmath $\Sigma$}}
\newcommand\bDelta {\mbox{\boldmath $\Delta$}}
\newcommand\bSiet {\mbox{$\bSi_{\mbox{\small$\boldeta$}}$}}
\newcommand\bSieti {\mbox{$\bSi_{\mbox{\small$\boldeta$}}^{-1}$}}
\newcommand\bSidelta {\mbox{ $\bSi_{\mbox{\small$\bDelta$}}$}}
\newcommand\bSideltainv {\mbox{ $\bSi_{\mbox{\small$\bDelta$}}^{-1}$}}
\newcommand\bthe {\mbox{\boldmath $\theta$}}
\newcommand\bxi {\mbox{\boldmath $\xi$}}
\newcommand\wbe {\widehat{\bbe}}
\newcommand\bbeta {\mbox{\boldmath $\beta$}}
\newcommand\etab {\mbox{\boldmath $\eta$}}
\newcommand\weta {\widehat{\eta}}
\newcommand\wteta {\widetilde{\eta}}
\newcommand\wetab {\widehat{\etab}}
\newcommand\wwbeta {\widehat{\beta}}
\newcommand\wbeta {\widehat{\mbox{\boldmath $\beta$}}}
\newcommand\wbetar {\wbeta_{\mbox{\sc r}}}
\newcommand\wdelta {\widehat{\delta}}
\newcommand\wtbeta {\widetilde{\bbeta}}
\newcommand\wlam {\widehat{\lambda}}
\newcommand\wLam {\widehat{\bLam}}
\newcommand\wbphi {\widehat{\bphi}}
\newcommand\wphi {\widehat{\phi}}
\newcommand\wrho {\widehat{\rho}}
\newcommand{\wsigma} {\widehat{\sigma}}
\newcommand\wtheta {\widehat{\theta}}
\newcommand\wbSidelta {\mbox{$\widehat{\bSi}\left(\bDelta_1,\dots,\bDelta_{n-1}\right)$}}
\newcommand\wbSideltainv {\mbox{ $\widehat{\bSi}^{-1}\left(\bDelta_1,\dots,\bDelta_n\right)$}}
\newcommand\wbSiet {\mbox{$\widehat{\bSi}_{\mbox{\small$\boldeta$}}$}}
\newcommand\wbSieti {\mbox{$\widehat{\bSi}_{\mbox{\small$\boldeta$}}^{-1}$}}
\newcommand\mm {\small \sc mm}
\newcommand\gm {\small \sc gm}
\newcommand\m {\small \sc m}
\newcommand\lts {\small \sc lts}
\newcommand\lms {\small \sc lms}
\newcommand\mH {\mbox{{\sc h}}}
\newcommand\mI {\mbox{{\sc i}}}
\newcommand\mO {\mbox{{\sc o}}}
\newcommand\mP {\mbox{{\sc p}}}
\newcommand\mQ {\mbox{{\sc q}}}
\newcommand{\alnu}{{\alpha}_{\nu}}
\newcommand{\kest}{K_*}
\newcommand{\weps} {\widehat{\varepsilon}}
\newcommand{\wfi} {\widehat{\phi}}
\newcommand{\wvarphi} {\widehat{\varphi}}
\newcommand{\wbfi} {\widehat{\mbox{\boldmath $\phi$}}}
\newcommand\wtX {\widetilde{\bX}}
\newcommand\wgama {\widehat{\gamma}}
\newcommand\wbgama {\widehat{\mbox{\boldmath $\gamma$} }}
\newcommand\wtgama {\widetilde{\gamma}}
\newcommand\wtx {\widetilde{\bx}}
\newcommand\wtgo {{\widetilde{g}}_0}
\newcommand\wtg {\widetilde{g}}
\newcommand\wtfi {\widetilde{\phi}}
\newcommand\wtbfi {\widetilde{\wbfi}}
\def\median{\mathop{\rm median}}
\def\mad{\mathop{\rm mad}}
\def\med{\mathop{\rm med}}
\def\var{\mathop{\rm var}}
\def\MSE{\mathop{\rm MSE}}
\def\real{\hbox{$\displaystyle I\hskip -3pt R$}}
\def\racional{\hbox{$\displaystyle I\hskip -7pt Q$}}
\def\realito{\tiny{\real}}
\def\racionalito{\tiny{\racional}}
\newcommand{\convpp}{ \buildrel{a.s.}\over\longrightarrow}
\newcommand{\convprob}{ \buildrel{p}\over\longrightarrow}
\newcommand{\convdist}{ \buildrel{{\cal D}}\over\longrightarrow}
\newcommand{\EIF}{{\mbox{EIF}}}
\def\dst{\displaystyle}
\def\noi{\noindent}
\def\new{\newline}
\def\arroba{\hbox{$\;\displaystyle\mbox{\em a}\hskip -7pt \bigcirc$}}
\def\square{\ifmmode\sqr\else{$\sqr$}\fi}
\def\sqr{\vcenter{
\hrule height.1mm
\hbox{\vrule width.1mm height2.2mm\kern2.18mm
\vrule width.1mm}
\hrule height.1mm}}
\usepackage{color}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Teoremas y proposiciones
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\newtheorem{teo}{Theorem}
\newtheorem{prop}[teo]{Proposition}
\newtheorem{lema}[teo]{Lemma}
\newtheorem{obs}{Remark}
\newtheorem{coro}[teo]{Corollary}
\usepackage{natbib}
%\usepackage{TandF-styles}
%\usepackage{INTAQ-V1.1}
\newenvironment{keywords}{\par}{\par}
\begin{document}
\title{Robust estimators in partly linear regression models on Riemannian manifolds}
\author{Guillermo Henry and Daniela Rodriguez}
\markboth{Henry and Rodriguez}{Partly linear regression models}
\maketitle
\begin{abstract}
Under a partly linear model we study a family of robust estimates for the regression parameter and the regression function when some of the predictors take values on a Riemannian manifold. We obtain the consistency and the asymptotic normality of the proposed estimators. Simulations and an application to a real dataset show the good performance of our proposal under small samples and contamination.
\end{abstract}
\begin{keywords}
Non parametric estimation, Partly linear models, Riemannian manifolds, Robustness.
\end{keywords}
\section{Introduction}% * <rajkummsc@gmail.com> 2014-10-16T08:56:58.652Z:
%
% dgdfg
%
Partly linear regression models (PLM) assume that the regression function can be modeled linearly on
some covariates, while it depends non parametrically on some others. To be more precise, assume
that we have a response $y_i \in \real$ and covariates $ (\bx_i, t_i)$ such that $\bx_i \in \real^p, t_i \in [0, 1]$ satisfying
\begin{equation}
y_i=\bx^{\tras}_i \bbeta+g(t_i)+\varepsilon_i \quad\quad 1\leq i \leq n\;,
\label{modelo}
\end{equation}
where the errors $\varepsilon_{i}$ are independent and independent of $(\bx_i^{\tras},t_i)$. Since the introductory work by \cite{eng}, partly linear models have become an important tool in the modeling of econometric or biometric data, combining the flexibility of non parametric models and the simple interpretations of the linear ones. However, in many applications, some of the predictors variables take values on a Riemannian manifold more than on Euclidean space and this structure of the variables needs to be taken into account in the estimation procedure.
\section{head one}
In a recent paper \cite{whr}, PLM are studied when the explanatory variables corresponding to the non parametric component, take values on a Riemannian manifold and the potential application of this model in an environmetric problem is explored. Unfortunately, as shown in Section \ref{simu}, this approach may not work well in presence of a small proportion of observations that deviate from the assumed model. One way to avoid this problem is to consider robust estimators that can resist the effect of a small number of atypical observations. The goal of this paper is to introduce resistant estimators for the regression parameter and the regression function under PLM (\ref{modelo}), when the predictor variable $t$ takes values on a Riemannian manifold.
This paper is organized as follows. Section \ref{estimadores} gives a brief summary of the classical estimators for this model and introduces the robust estimates. In Section \ref{asymp}, we study the consistency and the asymptotic distribution of the regression parameter under regular assumptions on the bandwidth sequence. Section \ref{simu} includes the results of a simulation study conducted in order to explore the performance of the new estimators under normality and contamination. Also, a robust cross validation procedure to select the smoothing parameter is considered. The advantages of the proposed method are also illustrated over a real data set, in Section \ref{real}. Proofs are given in the Appendix.
\section{The model and the estimators} \label{estimadores}
Assume that we have a sample of $n$ independent variables $(y_i,\bx_i^{\tras},t_i)$ in $\real^{p+1} \times M$ with the same distribution as $(y,\bx^{\tras},t)$, where $(M,\gamma)$ is a Riemannian manifold of dimension $d$. As in \cite{hrnp} we consider $({M},{\gamma})$ a $d-$dimensional compact oriented and connected Riemannian manifold without boundary. Note that in this case the injectivity radius of $(M,\gamma)$ ($inj_{\gamma} M$ ) is positive. Partly linear models assume that the relation between the response variable $y_i$ and the covariates $(\bx_i^{\tras},t_i)$ can be represented as
\begin{equation}
y_i=\bx^{\tras}_i \bbeta+g(t_i)+\varepsilon_i \quad\quad 1\leq i \leq n\;,
\label{semipara}
\end{equation}
where the errors $\varepsilon_{i}$ are independent and independent of $(\bx_i^{\tras},t_i)$. Furthermore, we will assume that $\varepsilon$ has symmetric distribution.
\subsection{Classical estimators}
Denote $\phi_0(\tau)=E(y|t=\tau)$ and $\bphi(t)=(\phi_1(t),\dots,\phi_p(t))$ where $\phi_j(\tau)=E(x_{ij}|t=\tau)$ for $1\leq j\leq p$, then we have that $g(t)=\phi_0(t)-\bphi(t)^{\tras}\bbeta$ and hence, $y-\phi_0(t)=(\bx-\bphi(t))^{\tras}\bbeta+\varepsilon$. The classical least square estimator of $\bbe$, $\wbeta_{ls}$ can be obtained minimizing
\begin{eqnarray*}
\widehat{\bbeta}_{ls}=\mbox{arg} \min_{\bbeta} \sum_{i=1}^n[(y_i-\wfi_{0,ls}(t_i))-(\bX_i-\wbfi_{ls}(t_i))^{\tras}\bbeta]^2,
\end{eqnarray*}
where $\wfi_{0,ls}$ and $\wbfi_{ls}$ are non parametric kernel estimators of $\phi_0$ and $\bphi$, respectively. More precisely, the non parametric estimators $\wfi_{0,ls}$ and $\wfi_{j,ls}$ of $\phi_0$ and $\phi_{j}$ can be defined as (see \cite{bp}),
\begin{equation}\label{estimadornop}
\wfi_{0,ls}(t)=\sum_{i=1}^n w_{n,h}(t,t_i) y_i \quad \mbox { and } \quad \wfi_{j,ls}(t)=\sum_{i=1}^n w_{n,h}(t,t_i) x_{ij}
\end{equation}
where $w_{n,h}(t,t_i)={{\theta^{-1}_t(t_i)}K(d_{\gamma}(t,t_i)/h_n)}/[{\sum_{k=1}^n {\theta^{-1}_t(t_k)}K(d_{\gamma}(t,t_k)/h_n)}]^{-1}$
with $K:\real \to \real$ a non-negative function, $d_{\gamma}$ the distance induced by the metric ${\gamma}$ and $\theta_t(s)$ the volume density function on $(M,\gamma)$. The bandwidth $h_n$ is a sequence of real positive numbers such that $\lim_{n\to \infty}h_n=0$ and $h_n$ are smaller than $inj_{\gamma} M$.
For a rigorous definition of the volume density function and the injectivity radius see \citep{Besse} or \cite{hriv}.
The final least square estimator of $g$ can be taken as $\wg_{ls}(t)=\wfi_{0,ls}(t)-\wbfi_{ls}(t)^{\tras}\wbeta_{ls}.$ The properties of these estimators have been studied in \cite{whr} while for Euclidean data, there is a vast literature on leas square estimators for PLM see for example \cite{eng}, \cite{sp}, \cite{chen} and \cite{liang}.
\subsection{Robust estimates}
As in the Euclidean setting, the estimators introduced by \cite{bp} are a weighted average of the response variables. Hence, these estimates are very sensitive to large fluctuations of the variables so that, the final estimator of $\bbeta$ can be seriously affected by anomalous data. To overcome this problem, \cite{hrnp} considered two families of robust estimators for the regression function when the explanatory variables $t_i$ take values on a Riemannian manifold $(M, \gamma)$. The first family combines the ideas of robust smoothing in Euclidean spaces with the
kernel weights introduced in \cite{bp}. The second one generalizes to our setting the proposal given by \cite{bofr}, who considered robust non parametric estimates using nearest neighbor weights, when the predictors $t$ are on $\real^d$.
As in \cite{bobi}, we consider a class of resistent estimates based on a three-step robust procedure under the partly linear model when some of the predictors take values on a Riemannian manifold. This approach does not require any moments to the errors, as is based on a natural extension of the conditional expectation to a setting where no moment conditions are required, studied in \cite{bofr}. Denote by $\Psi:\real \to \real$ a strictly increasing, bounded and continuous function and by $F(y|t=\tau)$ and $F_j(x|t=\tau)$, the conditional distribution functions of $y$ and $x_j$ given $t=\tau$, respectively. Let $\phi_j(t)$ $0\leq j\leq p$ be now any conditional location functionals
related to a robust smoother. More precisely, for each $t \in{M}$ denote by $\phi_j(t)$ for $0\leq j\leq p$ the solution of
\begin{eqnarray}\label{nuevadefini}
\left.E\left(\Psi\left(\frac{y-\phi_0(\tau)}{\sigma_0(\tau)}\right)\right|t=\tau\right)=0\;\quad \mbox{and}\quad
\left.E\left(\Psi\left(\frac{x_j-\phi_j(\tau)}{\sigma_j(\tau)}\right)\right|t=\tau\right)=0\quad\mbox{for}\; 1\leq j\leq p
\end{eqnarray}
with $\sigma_0(\tau)$ and $\sigma_j(\tau)$ for $1\leq j\leq n$ are robust measure of conditional scale respect to the conditional distribution of $y|t=\tau$ for $j=0$ and $x_{j}|t=\tau$ for $1 \leq j\leq p$.
The three-step robust estimators are defined as follows:
\begin{enumerate}
\item[] \bf Step 1: \rm Estimate $\phi_j(t)$, $0\leq j \leq p$ through a robust smoothing. Denote by $\wfi_{j,{\mbox{\sc r}}}$ the obtained estimates and $\wbfi_{\mbox{\sc r}}(t)=(\wfi_{1,{\mbox{\sc r}}}(t),\dots,\wfi_{p,{\mbox{\sc r}}}(t))^{\tras}$.
\item[]\bf Step 2: \rm Estimate the regression parameter by applying a robust regression estimate to the residuals $y_i-\wfi_{0,{\mbox{\sc r}}}(t_i)$ and $\bx_i-\wbfi_{\mbox{\sc r}}(t_i)$. Denote by $\wbeta_{\mbox{\sc r}}$ the obtained estimator.
\item[]\bf Step 3: \rm Define the robust estimate of the regression function $g$ as $\wg_{\mbox{\sc r}}(t)=\wfi_{0,{\mbox{\sc r}}}(t)-\wbeta_{\mbox{\sc r}}^{\tras}\wbfi_{\mbox{\sc r}}(t)$.
\end{enumerate}
Note that in \bf Step 1\rm , the regression functions correspond to predictors taking values in a Riemannian manifold. Local $M-$type estimates $\wfi_{0,{\mbox{\sc r}}}$ and $\wfi_{j,{\mbox{\sc r}}}$ are defined in \cite{hrnp} as the solution of
\begin{equation}\label{estimador}
\sum_{i=1}^n
w_{n,h}(t,t_i)\Psi\left(\frac{y_i-\wfi_{0,{\mbox{\sc r}}}(t)}{\sigma_{0,n}(t)}\right)=0 \quad \mbox{ and }\quad \sum_{i=1}^n
w_{n,h}(t,t_i)\Psi\left(\frac{x_{ij}-\wfi_{j,{\mbox{\sc r}}}(t)}{\sigma_{j,n}(t)}\right)=0
\end{equation}
respectively, where the score function $\Psi$ is strictly increasing, bounded and continuous and $\sigma_{0,n}(\tau)$ and $\sigma_{j,n}(\tau)$ $1\leq j\leq p $ are local robust scale estimates. Possible choice for the score function $\Psi$ can be the Huber or the bisquare $\Psi$-function.
The local robust scale estimates $\sigma_{0,n}(\tau)$ and $\sigma_{j,n}(\tau)$ $1\leq j\leq p $ can be taken as the local median of the absolute
deviations from the local median (local $\mbox{\sf MAD}$), \sl i.e., \rm the $\mbox{\sf MAD}$ (see \cite{hu}) with respect to the distributions
\begin{equation}\label{ecuacion}
F_n(y|t=\tau)=\sum_{i=1}^n w_{n,h}(\tau,t_i) I_{(-\infty,y]}(y_i) \quad\mbox{ and } \quad F_{j,n}(x|t=\tau)=\sum_{i=1}^n w_{n,h}(\tau,t_i) I_{(-\infty,x]}(x_{ij}).
\end{equation}
respectively.
In Step 2, the robust estimation of the regression parameter can be performed by applying to the residuals any of the robust methods proposed for linear regression.
For example, we can consider M-estimates (\cite{hu}) or GM-estimators (\cite{mall}). On the other hand, high breakdown point estimates with high efficiency as MM-estimates may also be computed (\cite{yo} and \cite{yoza}).
Throughout the paper, we consider $\wbeta_{\mbox{\sc r}}$ the solution of
\begin{equation}
\sum_{i=1}^n \psi_1\left((\werre_i-\wetab_i^{\tras}\wbeta_{\mbox{\sc r}})/{s_n}\right) w_1\left(\|\wetab_i\|\right)\wetab_i = 0,
\label{gm}
\end{equation}
with $s_n$ a robust consistent estimate of $\sigma_{\varepsilon}$, $\werre_i=y_i-\wfi_{0,{\mbox{\sc r}}}(t_i)$, $\wetab_i=\bx_i-\wbfi_{\mbox{\sc r}}(t_i)$,
$\psi_1$ a score function and $w_1$ a weight function. The zero of this equation can be computed iteratively using reweighting, as described for the location setting in [\cite{mmy}, Chapter 2].
The estimator defined by \cite{bp} corresponds to the choice $\Psi(u)=u$ with the estimators of the conditional distribution based on kernel weights defined in (\ref{ecuacion}). Therefore, if we considered the least square estimators of $\bbeta$ in Step 2, we obtain the classical estimators proposed in \cite{whr}. On the other hand, when $(M,\gamma)$ is $ \real^d$ endowed with the canonical metric, the estimation procedure reduces to the proposal introduced in \cite{bobi}. Details on the algorithm use to compute the robust non parametric estimators in Step 1 can be found in \cite{hrnp}.
\section{Asymptotic results}\label{asymp}
The theorems of this Section study the asymptotic behavior of the regression parameter estimator of the PLM under standard conditions.
Let $U$ be an open set of $M$, we denote by $C^k(U)$ the set of $k$ times continuously differentiable
functions from $U$ to $\real$. As in \cite{bp}, we assume that the image measure of $P$ by $t$ is absolutely continuous with respect to the Riemannian volume measure $\nu_\gamma$ and we denote by $f$ its density on $M$ with respect to $\nu_\gamma$.
Let $\sigma_0(\tau)$ and $\sigma_j(\tau)$ for $1\leq j\leq n$ be the $\mbox{\sf MAD}$ of the conditional distribution of $y_1|t=\tau$ for $j=0$ and $x_{1j}|t=\tau$ for $1 \leq j\leq n$, respectively.
\subsection{Consistency}\label{consist}
To derive strong consistency result of the estimate $\wbeta_{\mbox{\sc r}}$ of $\bbeta$ defined in \bf Step 2 \rm, we will consider the following set of assumptions.
\begin{enumerate}
\item[$H1.\label{H1}$]
$\Psi: \real \to \real$ is an odd, strictly increasing, bounded and continuously differentiable function, such that $ u\Psi'(u)\leq \Psi(u)$ for $u>0$.
\item[$H2.$] $F(y|t=\tau)$ and $F_j(x|t=\tau)$ are symmetric around $\phi_0(\tau)$ and $\phi_j(\tau)$ and there are continuous functions of $y$ and $x$ for each $\tau$.
\item[$H3.$] ${M}_0$ is a compact set on ${M}$ such that:
\begin{enumerate}
\item[i)] The density function $f$ of $t$, is a bounded function such that $\inf_{\tau\in {M}_0}f(\tau)=A>0$.
\item[ii)] $\dst\inf_{\stackrel{\tau\in {M}_0}{s\in {M}_0}} \theta_{\tau}(s)=B>0$.
\end{enumerate}
\item[$H4.$] The following equicontinuity condition holds
$$\forall \varepsilon >0,\quad \exists \delta>0: |z-z'|<\delta \Rightarrow\sup_{s\in {M}_0}|G_s(z)-G_s(z')|<\varepsilon\;$$
when the function $G_s(z)$ equals $F(z|t=s)$ or $F_j(z|t=s)$ for $1\leq j\leq p$.
\item[$H5.$] For any open set $U_0$ of $M$ such that $M_0\subset
U_0$,
\begin{enumerate}
\item[i)] $f$ is of class $C^2$ on $U_0$.
\item[ii)] $F(y|t=\tau)$ and $F_j(x|t=\tau)$ are uniformly Lipschitz in
$U_0$, that is, there exists a constant $C>0$ such that
$|G_{\tau}(z)-G_{s}(z)|\leq C\,d_g(\tau,s)$ for all $\tau,s\in U_0$ and $z\in\real$,
when the function $G_{s}(z)$ equals $F(z|t=s)$ or $F_j(z|t=s)$ for $1\leq j\leq p$.
\end{enumerate}
\item[$H6.$] $K: \real \to \real$ is a bounded
non negative Lipschitz function of order one, with compact
support $[0,1]$ satisfying $\int_{\realito^d} \bu
K(\|\bu\|)d\bu=\bf{0}$ and $0<\int_{\realito^d}
\|\bu\|^2K(\|\bu\|)d\bu<\infty$.
\item[$H7.$] The sequence $h_n$ is such that $h_n\to 0$ and ${nh_n^d}/{\log n}\to \infty $ as $n\to \infty$.
\item[$H8.$]
The estimator $\sigma_{j,n}(\tau)$ of $\sigma_{j}(\tau)$ satisfy $\sigma_{j,n}(\tau)\convpp \sigma_j(\tau)$ as $n\to\infty$ for all $\tau\in{M}_0$ and $0\leq j\leq p.$
\end{enumerate}
\noi \textbf{Remark \ref{consist}.1.} Assumption $H1$ is a standard condition in a robustness framework. Boundness
of the score function allows to derive the weak continuity of the robust conditional
functionals defined in (\ref{nuevadefini}) as shown in Theorem 2.2 of \cite{bofr}. Differentiability of the score function is needed in order to obtain uniform consistency results over compact sets. Assumption $H2$ and the oddness of the score function allow to identify $\phi_j$ for $0\leq j\leq p$. More precisely, these assumptions guarantee Fisher--Consistency, i.e. the definitions introduce in (\ref{nuevadefini}) coincide with the respective conditional expectations when they exist.
The fact that $\theta_s(s)=1$ for all $s\in M$ guarantees that $H3$ holds for a small compact neighborhood of $s$. $H4$ and $H5$ are needed in order to derive strong uniform consistency results (see \cite{bofr}).
Assumption $H6$ and $H7$, restricts the class of kernel functions to be chosen and establishes
conditions on the rate of convergence of the smoothing parameters, which are
standard in non parametric regression. It is easy to see that Assumption $H8$ is satisfied, when we consider $\sigma_{j,n}(\tau)$ as the local median of the absolute deviations from the local median.
\noi \textbf{Remark \ref{consist}.2.} In order to obtain the consistency of the estimators, we will need that the regression parameter can be written as a functional on a distribution. More precisely, denote by $P$ the distribution of $(r_i,\etab_i)$ where $\etab_i=\bx_i- \bphi(t_i)$ and $r_i=y_i-\phi_0(t_i)$ and let $\bbeta(H)$ be a regression functional, for the model $u = v\tras\bbeta +\varepsilon$ where $(u,v)\sim H$ and $u$ and $\varepsilon$ are independent.
Therefore, note that if $\werre_i=y_i-\wfi_{0,{\mbox{\sc r}}}(t_i)$, $\wetab_i=\bx_i-\wbfi_{\mbox{\sc r}}(t_i)$ and $\widehat{P_n}(A)=\frac 1n\sum_{i=1}^n I_{A}(\werre_i,\wetab_i)$, the robust estimator defined in (\ref{gm}) can be written as $\wbeta_{\mbox{\sc r}}=\bbeta(\widehat{P_n})$.
\vskip0.1in
\noi \textbf{Theorem \ref{consist}.1.} \textsl{Let $P$ and $\bbeta(H)$ defined in Remark \ref{consist}.2 and assume that $\bbeta(H)$ is continuous in $P$ and that it also provides Fisher-consistent estimates. Under assumptions { $H1$} to { $H8$ }, we
have that
\begin{enumerate}
\item[a)] $ |\wbeta_{\mbox{\sc r}}-\bbeta|\convpp 0$.
\item[b)] $\sup_{\tau\in M_0}|\wg_{\mbox{\sc r}}(\tau)-g(\tau)|\convpp 0$.
\end{enumerate}}
\subsection{Asymptotic distribution}\label{distrib}
In this Section, we assume that in \bf Step 2 \rm of the estimation procedure, the robust estimator $\wbeta_{\mbox{\sc r}}$ satisfies (\ref{gm}).
More precisely, let $\psi_1$ be a score function and $w_1$ be a weight function, we will derive the asymptotic distribution of the regression parameter estimates $\wbeta_{\mbox{\sc r}}$ defined as a solution of
$$
\sum_{i=1}^n \psi_1\left((\werre_i-\wetab_i^{\tras}\wbeta_{\mbox{\sc r}})/{s_n}\right) w_1\left(\|\wetab_i\|\right)\wetab_i = 0,$$
with $s_n$ a robust consistent estimate of $\sigma_{\varepsilon}$, $\werre_i=y_i-\wfi_{0,{\mbox{\sc r}}}(t_i)$, $\wetab_i=\bx_i-\wbfi_{\mbox{\sc r}}(t_i)$. Denote by $\etab_i=\bx_i- \bphi(t_i)$ and $r_i=y_i-\phi_0(t_i)$. Note that $r_i-\etab_i^{\tras}\bbeta=\varepsilon_i$.
To derive the asymptotic distribution of the regression parameter estimates, we will need the following set of assumptions.
\begin{enumerate}
\item[$A1.$] $\psi_1$ is an odd, bounded and twice continuously differentiable function with bounded derivatives $\psi^\prime_1$ and $\psi^ {\prime\prime}_1$, such that the functions $u\psi^\prime_1(u)$ and $u\psi^{\prime\prime}_1(u)$ are bounded.
\item[$A2.$] $E(w_1(||\etab_1||)\; \etab_1|t_1)=0$, $E(w_1(||\etab_1||)\; ||\etab_1||^2)<\infty$ and $A=E(\psi^\prime_1(\varepsilon/\sigma_{\varepsilon})w_1(||\etab_1||)\; \etab_1{\etab_1}\tras)$ is non singular.
\item[$A3.$] The function $w_1(u)$ is bounded, Lipschitz of order 1. Moreover, $\varphi(u)=w_1(u)u$ is also a bounded and continuously differentiable function with bounded derivative $\varphi^\prime(u)$ such that $u\varphi^\prime(u)$ is bounded.
\item[$A4.$] The functions $\phi_j(t)$ for $0\leq j \leq p$ are continuous with $\phi_j^\prime$ continuous in $M$.
\item[$A5.$] $\wphi_j(t)$ the estimates of $\phi_j(t)$ for $0\leq j \leq p$ have first continuous derivatives in $M$ and
{\begin{eqnarray}
n^{1/4}\sup_{t\in M}|\wphi_j(t)-\phi_j(t)|\convprob 0, \mbox{ for } 0 \leq j\leq p, \label{hipoorden}\\
\sup_{t\in M}|\nabla\wphi_j(t)-\nabla\phi_j(t)|\convprob 0, \mbox{ for } 0 \leq j\leq p. \label{hipoderi}
\end{eqnarray}}
where $\nabla \xi$ corresponds to the gradient of $\xi$. % with $\xi \in {{\cal F}( M)}$ and ${{\cal F}( M)}$ the class of functions $\{\xi\in {\cal C}^1(M): \|\xi\|_{\infty}\leq 1 \;\; \|\nabla \xi\|_{\infty}\leq 1\}$.
\item[$A6.$]
The estimator $s_n$ of $\sigma_{\varepsilon}$ satisfies $s_n\convprob \sigma_{\varepsilon}$ as $n\to\infty$.
\end{enumerate}
\vspace{0.1cm}
\noindent
\noi \textbf{Theorem \ref{distrib}.1.} Under the assumptions $A1$ to $A6$ we have that $$\sqrt{n}(\wbeta_{\mbox{\sc r}}-\bbeta)\convdist N(0,\sigma^2_{\varepsilon}A^{-1}\Sigma A^{-1}),$$ where $A$ is defined in $A2$ and $\Sigma=E(\psi^2_1(\varepsilon/\sigma_{\varepsilon}))E(w^2_1(||\etab_1||)\; \etab_1\etab_1\tras)$.
\rm
\vspace{0.2cm}
\noi \textbf{Remark \ref{distrib}.1.} To prove the previous result, we will need an inequality for the covering numbers of the class ${{\cal F}( M)}=\{\xi\in {\cal C}^1(M): \|\xi\|_{\infty}\leq 1 \;\; \|\nabla \xi\|_{\infty}\leq 1\}$. In the Appendix, we include some results related to the covering number on a Riemannian manifold.
\section{Simulation study}\label{simu}
In this section, we report the results of a simulation study designed to evaluate the performance of the robust procedure introduced in Section \ref{estimadores}. The main objective of this study is to compare the behavior of the classical and robust estimators for normal and contaminated samples. We consider the cylinder endowed with the metric induced by the canonical metric of $\real^3$. Due to of the computational complexity of the robust procedure, we performed 500 replications of independent samples of size $n=200$. In the smoothing procedure, the kernel was taken as the quadratic kernel $K(t)=( {15}/{16}) (1-t^2)^2 I(|x|<1)$. The local $M-$estimate was computed with bisquare score function, with constant 4.685, which gives a 95\% efficiency. As initial estimate in the iterative procedure to compute the local $M-$estimate, we have considered the local median. For the regression parameter, we have considered a GM-estimators (\ref{gm}) with score function on the residuals $\psi_1(r)=\psi_{\mbox{\scriptsize\sc h}, c}(r)=\max(-c,\min(r,c))$, i.e, the Huber function with tunning constant $c$. In the simulation study, we considered two different tunning constants $c=1.6$ and $c=1.7$ and weight function $w_1$
\begin{equation}
w_1(\eta)=W\left[\left(({\eta-\mu_{\eta}})/{\sigma_{\eta}}\right)^2\right]
\label{funcionpeso}
\end{equation}
where $W(t)=\psi_{\mbox{\scriptsize\sc h}, \chi_{1,0.975}}(t)/t$ while $\mu_{\eta}=\dst\median_{1\le i\le n}(\wetab_i)$ and $\sigma_{\eta}=\dst\mad_{1\le i\le n}(\wetab_i)/0.6754$ with $\wetab_i=\bx_i-\wfi_{\mbox{\sc r}}(t_i) $.
In the next section, we describe the robust cross validation procedure used in order to obtain the robust estimates. To compute the classical estimators, the bandwidth was selected using the classical cross validation described in \cite{whr}. The distance $d_{\gamma}$ and the volume density function for the cylinder were computed in \cite{hrnp} and \cite{hriv}. We consider the following model:
The variables $(y_i,x_i,t_i)$ for $1\leq i\leq n$ are generated as
$$
y_i=2\;x_i+ (t_{1i}+t_{2i}-t_{3i})^2+\varepsilon_i \quad \mbox{ and } \quad x_i=\sin(2t_{3i})+\eta_i
$$
where $t_i=(t_{1i},t_{2i},t_{3i})=(\cos(\theta_i),\sin(\theta_i),s_i)$ with the variables $\theta_i$ following a uniform distribution in $(0,2\pi)$ and the variables $s_i$ uniform in $(0,1)$, i.e., $t_i$ have support in the cylinder with radius 1 and height between $(0,1)$.
The non contaminated case, denoted $C_0$ corresponds to independent errors $\varepsilon_i$ and $\eta_i$ normally distributed with mean $0$ and standard deviation $1$ and $0.05$, respectively. Besides, the so-called contaminations $C_1$ and $C_2$, correspond to selecting a distribution in a neighborhood of the central normal distribution and are defined as $\varepsilon\sim 0.9N(0, 1) + 0.1N(0,25)$ and $\varepsilon\sim 0.9N(0, 1) + 0.1N(5,0.25)$, respectively. Also we consider the contamination denoted $C_3$, where we introduce artificially $10$ observations of the variables $x$ equal to $5$ but we did not change the response variables and the covariates $t$.
The contamination $C_1$ which corresponds to
inflating the errors will affect the variance of the regression estimates, while $C_2$ is an asymmetric contamination. The contamination $C_3$ allows to study the behavior of the estimators under the presence of high leverage points.
\subsection{Bandwidth Selection}\label{ventana}
To select the smoothing parameter there exist two commonly used approaches: $L^2$ cross-validation and plug-in
methods. However, these procedures may not be robust. Their sensitivity to anomalous data
was discussed by several authors, see for example \cite{lmw}, \cite{ws}, \cite{bfm}, \cite{cr} and \cite{len}.
Under a non parametric regression model with carriers in an Euclidean space for spline-based estimators, \cite{cr} introduced a robust cross-validation criterion to select the bandwidth parameter. Robust cross-validation selectors for kernel M-smoothers were considered in \cite{lmw}, \cite{ws} and \cite{len}, under a fully non parametric regression model. In the Euclidean setting, for partly linear model, a robust plug-in procedure was studied in \cite{br} while for dependent observations, a robust cross-validation criterion was considered in \cite{bobi2}.
When the variables belong in a Riemannian manifold, a robust cross validation procedure was discussed in \cite{hrnp} under a fully non parametric regression model, while a classical cross-validation procedure under a partly linear models was considered in \cite{whr}.
We describe a robust cross-validation method to select the bandwidth in the case of partly linear models that robustifies the proposal given in \cite{whr} and generalizes the procedure given in \cite{br}.
The robust cross-validation method constructs an asymptotically optimal data-driven bandwidth, and thus adaptive
data-driven estimators, by minimizing
$$RCV(h) =\sum_{i=1}^n\mu_n^2(\widehat{\varepsilon}_{i}(h))+\sigma_n^2(\widehat{\varepsilon}_{i}(h)),$$
with $\widehat{\varepsilon}_{i}(h)=y_i-\wfi_{0,-i,h}(t_i))-(\bX_i-\wbfi_{-i,h}(t_i))^{\tras}\widetilde{\bbeta}$; $\wfi_{0,-i,h}(t)$ and $\wbfi_{-i,h}(t)=(\wfi_{1,-i,h}(t),\dots,\wfi_{p,-i,h}(t))$ denote the robust non parametric estimators computed with bandwidth $h$ using all the data expect the $i-$th observation and $\widetilde{\bbeta}$ estimate the regression parameter by applying a robust regression estimate to the residuals $ y_i-\wfi_{0,-i,h}(t_i)$ and $\bX_i-\wbfi_{-i,h}(t_i)$. Besides, $\mu_n$ and $\sigma_n^2$ denote robust estimators of location and scale, respectively.
In the simulation study and in the real data example, we have consider $\mu_n$ as the median and $\sigma_n$ as the Huber $\tau-$scale estimator.
Also, the search for the bandwidth parameter was performed over the following values of bandwidths $0.1, 0.25, 1, 1.5, 2.5, 4, 5, 6.5$.
The asymptotic properties of data–-driven estimators require further careful investigation and are beyond the scope of this paper.
\subsection{Simulation results}
Table \ref{simu}.1 shows the mean, standard deviations, mean square error for the regression estimates of $\beta$ and the mean of the mean square error of the regression function $g$ over the 500 replications for the considered model. We denote with ${ls}$, ${\mbox{\sc r},1.6}$ and ${\mbox{\sc r},1.7}$ the classical and robust estimators with tuning constants $c=1.6$ and $c=1.7$, respectively. Figure \ref{simu}.1 shows the boxplot of the regression parameter estimators. Since the results are very similar for the robust estimators using different tuning constants, in the figure, we only report the results for the robust estimators for$c=1.6$.
\begin{center}
\begin{tabular}{ccccc}
\hline
& mean($\wbeta_{ls}$) & sd($\wbeta_{ls}$)& $\MSE(\wbeta_{ls})$& $\MSE(\wg_{ls})$\\
$C_0$ & 2.0732& 0.1445& 0.0262 &0.2396 \\
$C_1$ & 1.8789& 1.7592& 3.1095& 20.4485 \\
$C_2$ & 1.8722 & 1.7975 & 3.2475 &45.9719 \\
$C_3$ &0.2806 & 0.0804 & 2.9627 & 1.4988 \\\hline
& mean($\wbeta_{\mbox{\sc r},1.6}$) & sd($\wbeta_{\mbox{\sc r},1.6}$)& $\MSE(\wbeta_{\mbox{\sc r},1.6})$& $\MSE(\wg_{\mbox{\sc r},1.6})$\\
$C_0$ &2.0646& 0.1524& 0.0274& 0.2431 \\
$C_1$ & 2.0198& 0.2303& 0.0534& 0.4897 \\
$C_2$ & 2.0109 &0.2540& 0.0646& 1.3580 \\
$C_3$ & 1.9563 & 0.2318 & 0.0557 & 0.3635 \\\hline
& mean($\wbeta_{\mbox{\sc r},1.7}$) & sd($\wbeta_{\mbox{\sc r},1.7}$)& $\MSE(\wbeta_{\mbox{\sc r},1.7})$& $\MSE(\wg_{\mbox{\sc r},1.7})$\\
$C_0$ &2.0458 &0.1569& 0.0267& 0.2717\\
$C_1$ &2.0151 &0.2375& 0.0566& 0.3794\\
$C_2$ &1.9997& 0.3082& 0.0950 &2.2173\\
$C_3$ &1.9468& 0.2272& 0.0544 &0.3652 \\\hline
\end{tabular}
\end{center}
\vspace{-0.1cm}
\footnotesize Table \ref{simu} Performance of regression parameter and the regression functions under the different contaminations.\normalsize
\newpage
\begin{center}
\hspace{-1cm}a)\hspace{8cm} b)
\vspace{-0.4cm}
\hspace{-1cm} \includegraphics[scale=0.4]{betacl.eps}\hspace{-0.4cm} \includegraphics[scale=0.4]{betarob.eps}\\
\end{center}
\vspace{-0.8cm}
\footnotesize Figure \ref{simu}. Boxplot of a) $\wbeta_{ls}$ the classical estimators and b) $\wbeta_{\mbox{\sc r},1.6}$ the robust estimators with tuning constant $c=1.6$ under the different contaminations.\normalsize
\vspace{0.5cm}
The simulation results confirm the inadequate behavior of the classical estimators under the considered contaminations. The robust estimators of the regression function introduced in this paper show only a small lack of efficiency under normality. Under $C_1$, $C_2$ and $C_3$, the results obtained with the classical estimators are not reliable giving larger mean square errors obtained that those the robust procedure.
In Table 4.1, we can observe that the contamination $C_2$ not only affect the classical estimators of the regression function, but also for the robust ones for both tuning constants. In particular under this contamination, the mean square error both for the classical regression estimator or the classical regression function estimator are of order 125 and 190 times larger than in the non contaminated setting. However, for the robust estimators with $c=1.6$, the mean square error for the regression estimator and the regression function estimator increased 2.5 and 5.5 times than the non contaminated case, respectively. Under the contamination $C_3$, we can observe how the high leverage points affect the bias of the classical regression estimators. These extreme behaviors of the classical estimators show their inadequacy when one suspects that the sample can contains outliers.
The results for the robust estimators using different tuning constants are quite similar.
\section{Real Example}\label{real}
The solar insolation is the amount of electromagnetic energy or solar radiation incident on the surface of the earth. This variable measures the duration of sunlight in seconds. In the automatic stations, the World Meteorological Organization defines insolation as the sum of time intervals in which the irradiance exceeds the threshold of 120 watts per square meter. The irradiance is direct radiation normal or perpendicular to the sun on Earth's surface. The values of the insolation in a particular location depend of the weather conditions and the sun's position on the horizon. For example, the presence of clouds increases the absorption, reflection and dispersion of the solar radiation. Desert areas, given the lack of clouds, have the highest values of insolation on the planet. More details about insolation can be seen in \cite{isola}.
As commented above, the isolation is related with the weather conditions. In particular, to illustrate the proposed estimators, we will analyze the relation between the insolation, the humidity, the direction and the speed of the wind.
We consider a data set available at http://meteo.navarra.es/. This data consists on the daily average of relative humidity, speed and direction of the wind and the insolation. The direction's wind was measured with the point zero in the north direction and the wind's speed was measured in meters per second. The recorded data were measured daily in the automatic meteorologic station of Pamplona-Larrabide GN, in Navarra, Spain during the year 2004. In our study, we selected a random sample from this dataset.
In Figure 5.1, we can see that the humidity and the insolation follow a lineal relation except for the points contained in the ellipse on the left of the plot. Therefore, we consider a partly lineal model to explain the insolation, as a linear function of the humidity and a non parametric function of the speed and direction of the wind. Note that, the variables corresponding to the wind to be modeled non parametrically, belong to a cylinder. In the smoothing procedure, we choose the quadratic kernel $K(t)=( {15}/{16}) (1-t^2)^2 I(|x|<1)$. The robust estimators of the parameter and the regression function were computed with the same scores and the weight functions that we consider in the simulation study.
For the robust estimators, the bandwidth was selected using the robust cross validation procedure described in Section 4.1. For the classical estimators, a least square cross validation described in \cite{whr} was considered.
\vspace{-0.6cm}
\begin{center}
\hspace{-1cm} \includegraphics[scale=0.6]{humedad.eps}
%[height=9cm,width=11cm]
\end{center}
\vspace{-0.6cm}
\footnotesize Figure \ref{real} Scatter plot between the insolation and humidity. The dots in the ellipse correspond to the potential outliers.\normalsize
\vspace{0.5cm}
In a first step, we apply the classical and robust methods to obtain an estimator of the regression parameter using all the data. The obtained results are $\wbeta_{ls}=-1032.869$ and $\wbeta_{\mbox{\sc r}}=-1246.856$. Also, based in the asymptotic results obtained in Theorem 3.2.1, we calculate confidence intervals with level 0.95 estimating the unknown quantities. The result of the classical confidence interval is $CCI_{0.05}(\beta)= (-1229.9451 -835.7935)$ while that based in the robust estimation $RCI_{0.05}(\beta)=( -1453.658, -1040.053)$. We observe a shift between the classical and robust intervals that may be due to the effect of the observations with low values of humidity. For that reason, we compute the classical estimator using all the data except the potential outliers obtaining $\wbeta_{ls}=-1294.620$ with related confidence interval $CCI_{0.05}(\beta)=(-1502.983, -1086.257)$ giving values closer to those of the robust procedure. It is clear that, if we estimate the regression parameter with the classical approach when the dataset have outliers, the conclusions can be misleading. For example, with the classical estimator computed with all the data, the hypothesis that $\beta=-1000$ is not rejected, while the conclusions with the classical estimator without the outliers or the robust estimators with all the data are reversed, rejecting the null hypothesis.
\section*{Acknowledgments}
This research was partially supported by Grants 20020100300057 from the Universidad de Buenos Aires, \sc pip \rm 1122008010216 from \textsc{conicet} and \sc pict \rm -00821 from \textsc{anpcyt}, Argentina.
We wish to thank the two anonymous referees for valuable comments which
led to an improved version of the original paper.
\thispagestyle{empty} %\cleardoublepage
\appendix
\section{Appendix}\label{proofs}
\setcounter{equation}{0}
\def\theequation{A.\arabic{equation}}
\subsection{Proof of Theorem \ref{consist}.1.}
%\noi\sf Proof of Theorem \ref{consist}.1. \rm
a)
Since Remark \ref{consist}.2, it is suffice to prove that $\Pi(\widehat{P_n},P)\convpp 0$ where $\Pi $ stands for the Prohorov distance.
Thus, we will show that for any bounded and continuous function $f:\real^{p+1}\to\real$ we have that $|E_{\widehat{P_n}}f-E_{P}f|\convpp 0$.
Given $\varepsilon>0$, we have the bound
\begin{eqnarray*}
|E_{\widehat{P_n}}f-E_{P}f|&\leq& \frac 1n \sum_{i=1}^n|f(r_i+(\phi_0(t_i)-\wfi_0(t_i)),\etab_i+(\bphi(t_i)-\wbfi(t_i)))-f(r_i,\etab_i)|I_C(r_i,\etab_i,t_i)\\
&+&\frac 1n \sum_{i=1}^nI_{C^c}(r_i,\etab_i,t_i)\|f\|_{\infty}
\end{eqnarray*}
where $C_1\subset \real^{p+1}$ and $M_0\subset M$ are compact sets that $P(C)> 1-\varepsilon/(4\|f\|_{\infty})$ with $C=C_1\times M_0$.
Using Theorem 3.3 of \cite{hrnp}, we have that
\begin{eqnarray}\label{graciela}\sup_{t\in M_0}|\wfi_{j,{\mbox{\sc r}}}(t)-\phi_{j}(t)|\convpp 0
\end{eqnarray}
for $0\leq j\leq p$. From this fact and the Strong Law of Large Numbers, we have that there exists a set $\aleph\subset \Omega$ such that $P(\aleph)=0$ and for any $\omega\not\in\aleph$ we have that (\ref{graciela}) holds and $$\frac 1n \sum_{i=1}^nI_{C^c}(r_i,\etab_i,t_i)\to P(C^{c}).$$ Let $\bar{C_1}$ the closure of a neighborhood of radius 1 of $C_1$.The uniform continuity of $f$ on $\bar{C_1}$ implies that there exists $\delta $ such that $\max_{1\leq j\leq p+1}|u_j-u_i|$, $u,v\in \bar{ C_1}$ entails $|f(u)-f(v)|\leq \frac{\varepsilon}2$. Thus, we have that for $\omega\not\in \aleph$ and $n$ large enough $\max_{0\leq j\leq p}\sup_{t\in M_0}|\wfi_{j,{\mbox{\sc r}}}(t)-\phi_j(t)|<\delta$ so that, for $1\leq i\leq n$, we obtain that
$$
|f(r_i+(\phi_0(t_i)-\wfi_0(t_i)),\etab_i+(\bphi(t_i)-\wbfi(t_i)))-f(r_i,\etab_i)|\leq \frac{\varepsilon}2.
$$
concluding the proof.
b) Follows inmediately from (\ref{graciela}) and a). \square
\subsection{Entropy number}
\vskip0.1in
The main objective of this Section is to obtain an upper-bound to the entropy number of the class of functions ${{\cal F}( M)}=\{\xi\in {\cal C}^1(M): \|\xi\|_{\infty}\leq 1 \;\; \|\nabla \xi\|_{\infty}\leq 1\}$. The covering number $N(\delta,{\cal F}, \|\cdot\|)$ is the minimal number of balls, $\{\xi: \|\xi-\eta\|<\delta\}$ of radius $\delta$ needed to cover the set $\cal F$. The entropy number is the logarithm of the covering number. This upper-bound will be used to obtain the asymptotic distribution of the regression parameter. Several authors have studied bounds to the covering numbers for different sets, see for example \cite{van}, \cite{vv} and \cite{vw}. In particular, \cite{vw} obtained an upper-bound for the covering number of ${\cal F}(M)$ when $M$ is a bounded, convex subset of $\real^d$. For the convenience of the reader, we have included the following remark (see \cite{ghl}).
\noi \textbf{Remark \ref{proofs}.1.}
Let $N(\delta)$ be the minimal number of balls with radius $\delta$ needed to cover $(M,\gamma)$. A $\delta$-filling in $M$ is a maximal family of pairwise disjoint open balls of radius $\delta$ contained in $M$. We denote by $D(\delta)$ the maximum number of such balls. \rm Is easy to see that $N(2\delta)\leq D(\delta)$.
Let $diam_{(M,\gamma)}$ be the diameter of $(M,\gamma)$ and consider $\kappa\in \real$ such that $Ricc_{(M,\gamma)}\geq (d-1)\kappa$ where $Ricc_{(M\gamma)}$ is the Ricci curvature and $d$ the dimension of $M$. For example, if $\gamma$ is an Einstein metric's with scalar curvature $2(d-1)\kappa$ then the inequality is attained. Note that if $\kappa>0$ since Myers's Theorem \cite{ghl}, $(M,\gamma)$ is a compact manifolds with $diam_{(M,\gamma)}\leq\pi/\sqrt{\kappa}$. Since $M$ is compact there exists $\kappa$ with this property. Denote by $V^{\kappa}(r)$ the volume of a ball of radius $r$ in a complete, simply connected Riemannian manifold with constant curvature $\kappa$. By the Theorem of Bishop (see \cite{ghl}) we know that $\frac{Vol(B(x,r))}{V^{\kappa}(r)}$ is a non increasing function where $B(x,r)=\{z\in M: d_{\gamma}(x,z)\leq r\}$ is the geodesic ball centered in $x$ with radius $r$. Note that, $M$ is the closure of ${B(x,diam_{(M,\gamma)})}$ for any $x\in M$. If $\{B(a_1,\frac{\delta}{2}),\dots,B(a_D,\frac{\delta}{2})\}$ with $D=D(\frac{\delta}{2})$ is a $\frac{\delta}{2}-$filling then,
$$ N(\frac{\delta}{2})\leq \frac{Vol(M)}{\inf_{1\leq i\leq D}Vol(B(a_i,\frac{\delta}{2}))}\leq \frac{V^{\kappa}(diam_{(M,\gamma)})}{V^{\kappa}(\frac{\delta}{2})}.
$$
Therefore $N(\delta)\leq C(diam_{(M,\gamma)},\kappa)\delta^{-d}$.
\noi \textbf{Lemma \ref{proofs}.1.} \textsl{Let ${{\cal F}( M)}=\{\xi\in {\cal C}^1(M): \|\xi\|_{\infty}\leq 1 \;\; \|\nabla \xi\|_{\infty}\leq 1\}$, then the covering number for the supremum norm of ${{\cal F}(M)}$ that we denote by $N(\delta,{{\cal F}(M)},\|\cdot\|_{\infty})$ satisfies that $\log N(\delta,{{\cal F}(M)},\|\cdot\|_{\infty})<A\delta^{-d}$.}
\noi\sf Proof of Lemma \ref{proofs}.1. \rm Let ${\cal A}=\{B(a_1,\delta),\dots,B(a_N,\delta)\}$ be a covering of $M$ by open balls of radius $\delta$. By the remark above, we may assume that $N \leq C(diam_{(M,\gamma)},\kappa) \delta^{-d}$. Also, we can choose the covering ${\cal A}$ such that $B(a_i,\delta)\cap B(a_{i+1},\delta)\neq \emptyset$ for $1\leq i \leq N-1 $ and $a_i\neq a_j$ for $1\leq i,j\leq N$. Let $\xi\in {{\cal F}(M)}$, we define the function $\widetilde{\xi}=\sum_{i=1}^N\delta\left[\frac{\xi(a_i)}{\delta}\right]I_{D_i}$ where $D_1=B(a_1,\delta)$, $D_i=B(a_i,\delta)\backslash \cup_{j=1}^{i-1}B(a_{j},\delta)$ and $[a]$ denotes the integer part of $a$.
Let $x\in M$ and $1\leq k\leq N$ such that $x\in D_k$, then we have that $|\widetilde{\xi}(x)-\xi(x)|\leq|\widetilde{\xi}(x)-\xi(a_k)|+|\xi(a_k)-\xi(x)|$. Since $\widetilde{\xi}(a_k)=\widetilde{\xi}(x)$ and $\xi(a_k)=\widetilde{\xi}(a_k)+\delta(\frac{\xi(a_k)}{\delta}-[\frac{\xi(a_k)}{\delta}])=\widetilde{\xi}(a_k)+\delta B$ with $0\leq B<1$ and the fact that $\|\nabla\xi\|\leq 1$, we have that $|\widetilde{\xi}(x)-\xi(x)|\leq 2\delta$.
For the first value $\widetilde{\xi}(a_1)$ of a generic function $\xi$, we have $4[\frac 1{\delta}]+1$ possibilities since $|\widetilde{\xi}(a_1)|\leq 1$. Using that,
$$
|\widetilde{\xi}(a_k)-\widetilde{\xi}(a_{k-1})|\leq |\widetilde{\xi}(a_k)-\xi(a_{k})|+|{\xi}(a_k)-{\xi}(a_{k-1})|+|{\xi}(a_{k-1})-\widetilde{\xi}(a_{k-1})|\leq 6\delta.
$$
We get that, for each value of $\widetilde{\xi}(a_{k-1})$ we only have $13$ possibilities to choose $\widetilde{\xi}(a_k)$. Then, it is easy to verify that
$$
N(2\delta,{{\cal F}(M)},\|\cdot\|_{\infty})\leq (4[\frac 1{\delta}]+1) 13^N.
$$
concluding the proof. \square
{\noi \textbf{Remark \ref{proofs}.2.} Since $N(\delta,{{\cal F}(M)}, L^2(Q))\leq N(\delta,{{\cal F}(M)},\|\cdot\|_{\infty})$,
Lemma \ref{proofs}.1 entails that the covering number of ${{\cal F}(M)}$ satisfies, $\log N(\delta,{{\cal F}(M)}, L^2(Q))<A\delta^{-d}$.}
\subsection{ Proof of Theorem \ref{distrib}.1.}
%\noi\sf Proof of Theorem \ref{distrib}.1. \rm
Using a Taylor expansion of order one around $\wbeta_{\mbox{\sc r}}$, we get that $S_n=A_n(\wbeta_{\mbox{\sc r}}-\bbeta)$ with
\begin{eqnarray*}
S_n&=&\frac{1}{n} \sum_{i=1}^n \psi_1\left((\werre_i-\wetab_i^{\tras}\bbeta)/{s_n}\right) w_1\left(\|\wetab_i\|\right)\wetab_i \\
A_n&=&\frac{1}{n} \sum_{i=1}^n \psi^\prime_1\left((\werre_i-\wetab_i^{\tras}\wtbeta)/{s_n}\right) w_1\left(\|\wetab_i\|\right)\wetab_i \wetab_i\tras.
\end{eqnarray*}
where $\wtbeta$ is an intermediate point between $\bbe$ and $\wbeta_{\mbox{\sc r}}$. Analogous arguments to those used in Lemma 2 in \cite{bobi} allow to show that $A_n\convprob A$ where $A$ is defined in $A2$.
Since $\frac{\sqrt{n}}{n} \sum_{i=1}^n \psi_1\left(\varepsilon_i/{\sigma_{\varepsilon}}\right) w_1\left(\|\etab_i\|\right)\etab_i$ is asymptotically normally distributed with covariance $\bSi$, it will enough to show that
\begin{eqnarray}
&\sqrt{n}&[S_n-\frac{1}{n} \sum_{i=1}^n \psi_1\left(\varepsilon_i/{s_n}\right) w_1\left(\|\etab_i\|\right)\etab_i]\convprob 0,\label{prob1}\\
&\sqrt{n}&[\frac{1}{n} \sum_{i=1}^n \psi_1\left(\varepsilon_i/{s_n}\right) w_1\left(\|\etab_i\|\right)\etab_i-\frac{1}{n} \sum_{i=1}^n \psi_1\left(\varepsilon_i/{\sigma_{\varepsilon}}\right) w_1\left(\|\etab_i\|\right)\etab_i]\convprob 0.\label{prob2}
\end{eqnarray}
We first prove (\ref{prob1}). Using a Taylor expansion of order two, we have that the following decomposition.
\begin{eqnarray*}
\sqrt{n}[S_n-\frac{1}{n} \sum_{i=1}^n \psi_1\left(\varepsilon_i/{s_n}\right) w_1\left(\|\etab_i\|\right)\etab_i]&=&\sum_{i=1}^5S_{ni}
\end{eqnarray*}
with
\begin{eqnarray*}
S_{n1}&=&\frac{\sqrt{n}}{n} \sum_{i=1}^n \psi^\prime_1\left(\varepsilon_i/{s_n}\right) [\wbgama\tras(t_i)\bbeta-\wgama_0(t_i)] w_1\left(\|\etab_i\|\right)\etab_i\\
S_{n2}&=&\frac{s_n\sqrt{n}}{n} \sum_{i=1}^n \psi_1\left(\varepsilon_i/{s_n}\right) [w_1\left(\|\wetab_i\|\right)\wetab_i-w_1\left(\|\etab_i\|\right)\etab_i]\\
S_{n3}&=&\frac{s_n\sqrt{n}}{n} \sum_{i=1}^n [\psi_1\left(\werre_i-\wetab_i^{\tras}\bbeta/{s_n}\right)-\psi_1\left(\varepsilon_i/{s_n}\right)] w_1\left(\|\wetab_i\|\right)[\wetab_i-\etab_i]\\
S_{n4}&=&\frac{\sqrt{n}}{2n} \sum_{i=1}^n \psi^{\prime\prime}_1\left(\varsigma_i/{s_n}\right) [\wbgama\tras(t_i)\bbeta-\wgama_0(t_i)]^2 w_1\left(\|\wetab_i\|\right)\etab_i\\
S_{n5}&=&\frac{\sqrt{n}}{n} \sum_{i=1}^n \psi_1\left(\varepsilon_i/{s_n}\right)[\wbgama\tras(t_i)\bbeta-\wgama_0(t_i)] [w_1\left(\|\wetab_i\|\right)-w_1\left(\|\etab_i\|\right)]\etab_i,\\
\end{eqnarray*}
where $\wgama_j(t)=\wfi_j(t)-\phi_j(t)$ for $0\leq j\leq n$ and $\wbgama(t)=(\wgama_1,\dots,\wgama_n)^{\tras}$.
By $A3$, $A5$ and $A6$ is easy to see that $\|S_{in}\|\convprob 0$ for $i=3,4,5.$
Let
\begin{eqnarray*}
{\cal J}^{(j)}_{1n}(\sigma,\xi)\!\!\!\!&=&\!\!\!\!\frac{\sqrt{n}}{n} \sum_{i=1}^n f^{(j)}_{1,\sigma,\xi}(r_i,\etab_i,t_i)\\
&=&\frac{\sqrt{n}}{n} \sum_{i=1}^n \psi^\prime_1\left(\frac{r_i-\etab_i\tras\bbe}{\sigma}\right) \xi(t_i) w_1\left(\|\etab_i\|\right)(\etab_i)_j\\
{\cal J}^{(j)}_{2n}(\sigma,\bxi)\!\!\!\!&=&\!\!\!\!\frac{\sqrt{n}}{n} \sum_{i=1}^n f^{(j)}_{2,\sigma,\bxi}(r_i,\etab_i,t_i)\!\! \\
&=&\!\!\frac{\sigma\sqrt{n}}{n} \sum_{i=1}^n \psi_1\left(\frac{r_i-\etab_i\tras\bbe}{\sigma}\right)\!\! [w_1\left(\|\etab_i+\bxi\|\right)(\etab_i+\bxi(t_i))_j-w_1\left(\|\etab_i\|\right)(\etab_i)_j]
\end{eqnarray*}
Therefore, it remains to show that ${\cal J}^{(j)}_{1n}(s_n,\wgama_s)\convprob 0$ and ${\cal J}^{(j)}_{1n}(s_n,\wbgama)\convprob 0$ for $0\leq j,s\leq p.$ From now on, we will omitted the superscript $j$ for the sake of simplicity.
Let ${{\cal F}(M)}=\{\xi\in {\cal C}^1(M): \|\xi\|_{\infty}\leq 1 \; \|\nabla\xi\|_{\infty}\leq 1\}$ and consider the classes of functions
\begin{eqnarray*}
{\cal F}_1&=&\{f_{1,\sigma,\xi}(r,\etab,t)\quad \sigma\in(\sigma_{\varepsilon}/2,2\sigma_{\varepsilon})\quad \xi\in{{\cal F}(M)}\}\\
{\cal F}_2&=&\{f_{2,\sigma,{\bxi}}(r,\etab,t)\quad \sigma\in(\sigma_{\varepsilon}/2,2\sigma_{\varepsilon})\quad \bxi=(\xi_1,\dots,\xi_p), \; \xi_s\in{{\cal F}(M)}\}
\end{eqnarray*}
Note that, the independence of $\varepsilon_i$ and $(\bx_i,t_i)$, $A2$ and the fact that the errors $\varepsilon$ have symmetric distribution imply that $E(f(r_i,\etab_i,t_i))=0$ for any $f\in {\cal F}_1\cup {\cal F}_2$. As in \cite{bobi}, it is easy to see that the covering number of the classes ${\cal F}_1$ and ${\cal F}_2$ satisfy
$$
N(C_1\epsilon,{\cal F}_1, L^2(Q))\leq N(\epsilon,{{\cal F}(M)},L^2(Q)) \; N(\varepsilon,(\sigma_{\varepsilon}/2,2\sigma_{\varepsilon}), |\cdot |)
$$
$$ N(C_2\epsilon,{\cal F}_2, L^2(Q))\leq N^p(\epsilon,{{\cal F}(M)},L^2(Q)) \; N(\varepsilon,(\sigma_{\varepsilon}/2,2\sigma_{\varepsilon}), |\cdot |)$$
where $Q$ is any probability measure. From Remark \ref{proofs}.2 we get that the covering number of ${{\cal F}(M)}$ satisfies that $\log N(\epsilon,{{\cal F}(M)}, L^2(Q))<A\varepsilon^{-d}$. Therefore, the classes ${\cal F}_1$ and ${\cal F}_2$ have finite uniform-entropy. For $0<\delta<1, $ consider the subclasses ${\cal F}_{1,\delta}$ and ${\cal F}_{2,\delta}$ of ${\cal F}_1$ and ${\cal F}_2$ respectively, defined by,
\begin{eqnarray*}
{\cal F}_{1,\delta}&=&\{f\in{\cal F}_1\quad \xi\in{{\cal F}(M)},\; \|\xi\|_{\infty}<\delta\}\\
{\cal F}_{2,\delta}&=&\{f\in{\cal F}_2\quad \bxi=(\xi_1,\dots,\xi_p), \; \xi_s\in{{\cal F}(M)},\;\|\xi_s\|_{\infty}<\delta\}
\end{eqnarray*}
For any $\epsilon>0$, let $0<\delta<1$ from $A5$ and $A6$, we obtain that for $n$ large enough $P(s_n\in(\sigma_{\varepsilon}/2,2\sigma_{\varepsilon}))>1-\delta/2$ and $P(\wgama_s\in{{\cal F}(M)} \mbox{ and } \|\wgama_s\|_{\infty}<\delta)>1-\delta/2$ for $0\leq s \leq p$.
Then, the maximal inequality for covering numbers entails that for $0\leq s\leq p$
\begin{eqnarray*}
P(|{\cal J}_{1n}(s_n,\wgama_s)|>\epsilon)&\leq&P(|{\cal J}_{1n}(s_n,\wgama_s)|>\epsilon; \; s_n\in(\sigma_{\varepsilon}/2,2\sigma_{\varepsilon});\;\wgama_s\in{{\cal F}(M)} \mbox{ and } \|\wgama_s\|_{\infty}<\delta )+\delta\\
&\leq& P\left(\sup_{f\in {\cal F}_{1,\delta}}\left|\frac{\sqrt{n}}{n} \sum_{i=1}^n f(r_i,\etab_i,t_i)\right |>\epsilon\right)+\delta\\
&\leq& \frac 1{\epsilon}E\left(\sup_{f\in {\cal F}_{1,\delta}}\left|\frac{\sqrt{n}}{n} \sum_{i=1}^n f(r_i,\etab_i,t_i)\right |\right)+\delta\\
&\leq&\frac 1{\epsilon}{\cal G}(\delta,{\cal F}_1)+\delta
\end{eqnarray*}
where ${\cal G}(\delta,{\cal F})=\sup_{Q}\int_0^{\delta} \sqrt{1+\log N(\varepsilon \|F\|_{Q,2},{\cal F}, L^2(Q))}d\epsilon$. Using that ${\cal F}_1$ satisfies the uniform--entropy condition we get that $\lim_{\delta\to 0}{\cal G}(\delta,{\cal F}_1)= 0$, therefore $S_{1n}\convprob 0$. Similarly arguments considered for ${\cal J}_{2n}(s_n,\wbgama)$ and the class ${\cal F}_2$ to obtain that $S_{2n}\convprob 0$.
The proof of (\ref{prob2}), follows using analogous arguments that those considered in (\ref{prob1}).\square
\begin{thebibliography}{}
%\bibitem{aq} Aneiros-P\'erez, G. and Quintela del R\1o, G. (2002). Plug-in bandwidth choice in partial linear regression models with autoregressive errors. \sl J. Statist. Planning and Inference. \rm \bf 57\rm, 23-48.
%\bibitem{av} Aneiros-P\'erez, G. and Vieu, F. (2006). Semi-Functional partial linear regression. \sl Statistics \& Probability Letters. \bf 76\rm, 1102-1110.
\bibitem[\protect\citeauthoryear{Besse}{Besse}{1978}]{Besse} Besse, A. (1978). Manifolds all of whose Geodesics are Closed. {\it Springer-Verlag}.
\bibitem[\protect\citeauthoryear{Bibi}{Bibi}{1978}]{bobi} Bianco, A. and Boente, G. (2004). Robust estimators in semiparametric partly linear regression models. \sl J. Statist. Plann. Inference. \bf 122, \rm 229--252.
%\bibitem{bobi2} Bianco, A. Boente, G. (2007). Robust estimators under semi-parametric partly linear autoregression: asymptotic behaviour and bandwidth selection. \sl J. Time Ser. Anal. \bf 28, \rm 274--306.
%
%\bibitem{isola} Bird, R. and Hulstrom, R. (1980). Direct insolation models. Solar Energy Research Institute.
%
%\bibitem{bofr} Boente, G. and Fraiman, R. (1989). Robust nonparametric regression estimation. \textsl{Journal of
%Multivariate Analysis}, {\bf 29}, \rm 180--198.
%
%
%\bibitem{bfm} Boente, G.; Fraiman, R. and Meloche, J. (1997). Robust plug-in bandwidth estimators in nonparametric regression. \sl J. Statist. Plann. Inference. \bf 57\rm, 109--142.
%
%\bibitem{br} Boente, G. and Rodriguez, D. (2008). Robust bandwidth selection in semiparametric partly linear regression models: Monte Carlo study and influential analysis. \sl Computational Statistics \& Data Analysis. \bf 52\rm, 2808--2828.
%
%%\bibitem{BP} Bhattacharya, R. and Patrangenaru, V. (2002). Nonparametric estimation of location and dispersion on Riemannian manifolds. \sl Journal of Statistical Planning and Inference. \bf 108, \rm 23-35.
%
%\bibitem{cr} Cantoni, E. and Ronchetti, E. (2001). Resistant selection of the smoothing parameter for smoothing splines. \sl Statist. Comput. \bf 11\rm, 141--146.
%
%\bibitem{chen} Chen, H. (1988). Convergence rates for parametric components in a partly linear model. \sl Ann. Statist. \bf 16, \rm 136--146. \rm
%
%\bibitem{eng} Engle, R.; Granger, C.; Rice, J. and Weiss, A. (1986). Nonparametric estimates of the relation between weather and electricity sales. \sl J. Amer. Statist. Assoc. \bf 81, \rm 310--320.
%
%%\bibitem{fv} Ferraty, F. and Vieu, F. (2004). Nonparametric models for functional data, with application in regression, time-series prediction and curve discrimination. \sl Journal of Nonparametric Statistics. \bf 16, \rm 111-125.
%
%\bibitem{whr} Gonzalez--Manteiga, W.; Henry, G. and Rodriguez, D. (2012). Partly linear models on Riemannian manifolds. \sl Journal of Applied Statistics. \rm
% \bf 39, \rm 8, 1797--1809.
%
%%\bibitem{prada1} Garc\'\i a-Jurado, I.; Gonzalez-Manteiga, W.; Prada-Sanchez, J.M., Febrero-Bande, M. and Cao, R. (1995). Predicting using Box-
%%Jenkins, nonparametric and bootstrap techniques. \sl Technometrics. \bf 37, \rm 303-310.
%
%%\bibitem{HL} Hendriks, H. and Landsman, Z. (2007). Asymptotic data analysis on manifolds. \sl Ann. Statist. \bf 35, 1, \rm 109-131.
%
%\bibitem{ghl} Gallot, S.; Hulin, D. and Lafontaine, J. (1986). Riemannian Geometry, $3^{th}$ edition. {\it Springer.}
%
%\bibitem{hrnp} Henry, G. and Rodriguez, D. (2009). Robust nonparametric regression on Riemannian manifolds. \sl Journal of Nonparametric Statistics. \bf 21, 5, \rm 611--628.
%
%\bibitem{hriv} Henry, G. and Rodriguez, D. (2009). Kernel density estimation on Riemannian manifolds: asymptotic results. \sl Journal Math. Imaging Vis. \bf 43, \rm 235--639.
%\bibitem{hu} Huber, P. (1981). Robust Statistics, \it Wiley, New York. \rm
%
%\bibitem{len} Leung, D. (2005). Cross-validation in nonparametric regression with outliers. \sl Ann. Statist. \bf 33\rm, 2291–-2310.
%
%\bibitem{lmw} Leung, D.; Marriott, F. and Wu, E. (1993). Bandwidth selection in robust smoothing. \sl Journal of Nonparametric Statistics. \bf 4\rm, 333-–339.
%
%\bibitem{liang} Liang, H. (2000). Asymptotic of nonparametric part in partly linear models with measurement error in the nonparametric part. \sl J. Statist. Plann. Inference. \bf 86, \rm 51--62.
%
%\bibitem{mall} Mallows, C. (1975). On some topics in robustness. Technical Memorandum, AT\&T Bell Laboratories, Murray Hill.
%
%\bibitem{mmy} Maronna, R., Martin, D. and Yohai, V. (2006), \textsl{Robust statistics: theory and methods}, New York: Wiley.
%
%%\bibitem{Mardia} Mardia, K. (1972). Statistics of Directional Data. \it Academic Press, London. \rm
%
%\bibitem{bp} Pelletier, B. (2006). Nonparametric regression estimation on closed Riemannian manifolds. \sl Journal of Nonparametric Statistics. \bf 18, \rm 57--67.
%%\bibitem{pennec} Pennec, X. (2006). Intrinsic Statistics on Riemannian Manifolds: Basic Tools for Geometric Measurements. \sl Journal Math. Imaging Vis. \bf 25, \rm 127-154.
%
%%\bibitem{prada2} Prada-Sanchez, J.M., Febrero-Bande, M., Cotos-Ya\~nez, T.; Gonzalez-Manteiga, W.; Bermudez-Cela, J. and Lucas-Dominguez, T. (2000). Prediction of SO$_2$ pollution incidents near a power station using partially linear models and an historical matrix of predictor-response vectors. \sl Environmetrics. \bf 11, \rm 209-225.
%
%%\bibitem{prada3} Prada-Sanchez, J.M. and Febrero-Bande, M. (1997). Parametric, non-parametric and mixed approaches to prediction of sparsely
%%distributed pollution incidents: a case study. \sl Journal of Chemometrics. \bf 11, \rm 13-32.
%
%\bibitem{sp} Speckman, P. (1988). Kernel smoothing in partial linear models. \sl J. Roy. Statist. Soc. \rm Ser. B. \bf 50\rm, 413--436.
%
%\bibitem{van} van der Geer, Sara. (2000). Empirical processes in M-Estimation. \it Cambridge University Press.\rm
%
%\bibitem{vv} Van der Vaart, A. (1998). \textsl{Asymptotic Statistics}. Cambridge Series in Statistical and Probabilistic Mathematics. Cambridge University Press.
%
%\bibitem{vw} Van der Vaart, A. and Wellner, J. (1996). \textsl{Weak convergence and empirical processes with applications to statistics}. New York: Springer.
%
%\bibitem{yo} Yohai, V. (1987). High breakdown point and high efficiency robust estimates for regression. \sl Ann. Statist. \bf 15\rm,
%642--656.
%
%\bibitem{yoza} Yohai, V. and Zamar, R. (1988). High breakdown estimates of regression by means of the minimization of an
%efficient scale. \sl J. Amer. Statist. Assoc. \bf 83\rm, 406--413.
%
%\bibitem{ws} Wang, F. and Scott, D. (1994). The L1 method for robust nonparametric regression. \sl J. Amer. Statist. Assoc. \bf 89\rm, 65--76.
%
%\normalsize
\end{thebibliography}
\end{document}