

This patch creates a new namespace for the network. This namespace
should be filled wih the network ressources.  The first ressource to
isolate is the network devices  The namespace consists on a view on the
network devices. The view is empty when the namespace is unshared and
is populated by configuration from outside of the namespace.

Replace-Subject: [Network namespace] Network devices view list 
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com> 
--
 include/linux/nsproxy.h |    1 
 include/linux/sched.h   |    1 
 net/Makefile            |    3 

Index: 2.6-mm/include/linux/net_ns.h
===================================================================

Signed-off-by: Dave Hansen <haveblue@us.ibm.com>
---

 lxc-dave/include/linux/net_ns.h  |   87 ++++++++++
 lxc-dave/include/linux/nsproxy.h |    1 
 lxc-dave/include/linux/sched.h   |    1 
 lxc-dave/kernel/nsproxy.c        |   16 +
 lxc-dave/net/Kconfig             |    8 
 lxc-dave/net/Makefile            |    3 
 lxc-dave/net/net_ns.c            |  319 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 435 insertions(+)

diff -puN /dev/null include/linux/net_ns.h
--- /dev/null	2005-03-30 22:36:15.000000000 -0800
+++ lxc-dave/include/linux/net_ns.h	2006-05-31 12:48:41.000000000 -0700
@@ -0,0 +1,87 @@
+#ifndef _LINUX_NET_NS_H
+#define _LINUX_NET_NS_H
+
+#include <linux/kref.h>
+#include <linux/netdevice.h>
+#include <linux/sched.h>
+#include <linux/nsproxy.h>
+
+struct net_ns_dev {
+	struct list_head list;
+	struct net_device* dev;
+};
+
+struct net_ns_dev_list {
+	struct list_head list;
+	rwlock_t lock;
+};
+
+struct net_namespace {
+	struct kref kref;
+	struct net_ns_dev_list dev_list;
+};
+
+extern int net_ns_unregister_dev(struct net_device* dev,
+				 struct net_ns_dev_list *devlist);
+
+extern int net_ns_register_dev(struct net_device* dev,
+			       struct net_ns_dev_list *devlist);
+
+extern struct net_device *net_ns_find_dev_by_name(const char* devname,
+						  struct net_ns_dev_list *devlist);
+extern int net_ns_remove_dev(const char* devname,
+			     struct net_ns_dev_list *devlist);
+
+extern int net_ns_add_dev(const char* devname,
+			  struct net_ns_dev_list *devlist);
+
+extern struct net_namespace init_net_ns;
+
+static inline void get_net_ns(struct net_namespace *ns)
+{
+	kref_get(&ns->kref);
+}
+
+#ifdef CONFIG_NET_NS
+
+extern int unshare_network(unsigned long unshare_flags,
+			   struct net_namespace **new_net);
+extern int copy_network(int flags, struct task_struct *tsk);
+extern void free_net_ns(struct kref *kref);
+
+static inline void put_net_ns(struct net_namespace *ns)
+{
+	kref_put(&ns->kref, free_net_ns);
+}
+
+static inline void exit_network(struct task_struct *p)
+{
+	struct net_namespace *net_ns = p->nsproxy->net_ns;
+	if (net_ns) {
+		put_net_ns(net_ns);
+	}
+}
+#else
+
+static inline int unshare_network(unsigned long unshare_flags,
+				  struct net_namespace **new_net)
+{
+	return -EINVAL;
+}
+static inline int copy_network(int flags, struct task_struct *tsk)
+{
+	return 0;
+}
+static inline void put_net_ns(struct net_namespace *ns)
+{
+}
+static inline void exit_network(struct task_struct *p)
+{
+}
+#endif
+static inline struct net_namespace *net_ns(void)
+{
+	return current->nsproxy->net_ns;
+}
+
+#endif
diff -puN include/linux/nsproxy.h~A1-netns-network_devices_view_list include/linux/nsproxy.h
--- lxc/include/linux/nsproxy.h~A1-netns-network_devices_view_list	2006-05-31 12:48:41.000000000 -0700
+++ lxc-dave/include/linux/nsproxy.h	2006-05-31 12:48:41.000000000 -0700
@@ -23,6 +23,7 @@ struct nsproxy {
 	atomic_t count;
 	spinlock_t nslock;
 	struct uts_namespace *uts_ns;
+	struct net_namespace *net_ns;
 	struct namespace *namespace;
 };
 extern struct nsproxy init_nsproxy;
diff -puN include/linux/sched.h~A1-netns-network_devices_view_list include/linux/sched.h
--- lxc/include/linux/sched.h~A1-netns-network_devices_view_list	2006-05-31 12:48:41.000000000 -0700
+++ lxc-dave/include/linux/sched.h	2006-05-31 12:48:41.000000000 -0700
@@ -25,6 +25,7 @@
 #define CLONE_CHILD_SETTID	0x01000000	/* set the TID in the child */
 #define CLONE_STOPPED		0x02000000	/* Start in stopped state */
 #define CLONE_NEWUTS		0x04000000	/* New utsname group? */
+#define CLONE_NEWNET		0x08000000	/* New network namespace */
 
 /*
  * Scheduling policies
diff -puN kernel/nsproxy.c~A1-netns-network_devices_view_list kernel/nsproxy.c
--- lxc/kernel/nsproxy.c~A1-netns-network_devices_view_list	2006-05-31 12:48:41.000000000 -0700
+++ lxc-dave/kernel/nsproxy.c	2006-05-31 12:48:41.000000000 -0700
@@ -14,6 +14,7 @@
 #include <linux/nsproxy.h>
 #include <linux/namespace.h>
 #include <linux/utsname.h>
+#include <linux/net_ns.h>
 
 static inline void get_nsproxy(struct nsproxy *ns)
 {
@@ -59,6 +60,8 @@ struct nsproxy *dup_namespaces(struct ns
 			get_namespace(ns->namespace);
 		if (ns->uts_ns)
 			get_uts_ns(ns->uts_ns);
+		if (ns->net_ns)
+			get_net_ns(ns->net_ns);
 	}
 
 	return ns;
@@ -106,6 +109,17 @@ int copy_namespaces(int flags, struct ta
 		goto out;
 	}
 
+	err = copy_network(flags, tsk);
+	if (err) {
+		if (new_ns->namespace)
+			put_namespace(new_ns->namespace);
+		if (new_ns->uts_ns)
+			put_uts_ns(new_ns->uts_ns);
+		tsk->nsproxy = old_ns;
+		put_nsproxy(new_ns);
+		goto out;
+	}
+
 out:
 	put_nsproxy(old_ns);
 	return err;
@@ -117,5 +131,7 @@ void free_nsproxy(struct nsproxy *ns)
 			put_namespace(ns->namespace);
 		if (ns->uts_ns)
 			put_uts_ns(ns->uts_ns);
+		if (ns->net_ns)
+			put_net_ns(ns->net_ns);
 		kfree(ns);
 }
diff -puN net/Kconfig~A1-netns-network_devices_view_list net/Kconfig
--- lxc/net/Kconfig~A1-netns-network_devices_view_list	2006-05-31 12:48:41.000000000 -0700
+++ lxc-dave/net/Kconfig	2006-05-31 12:48:41.000000000 -0700
@@ -60,6 +60,14 @@ config INET
 
 	  Short answer: say Y.
 
+config NET_NS
+	bool "Network namespaces"
+	default n
+	---help---
+	  Support for network namespaces.  This allows containers, i.e.
+	  vservers, to use network namespaces to provide isolated
+	  network for different servers.  If unsure, say N.
+
 if INET
 source "net/ipv4/Kconfig"
 source "net/ipv6/Kconfig"
diff -puN net/Makefile~A1-netns-network_devices_view_list net/Makefile
--- lxc/net/Makefile~A1-netns-network_devices_view_list	2006-05-31 12:48:41.000000000 -0700
+++ lxc-dave/net/Makefile	2006-05-31 12:48:41.000000000 -0700
@@ -50,3 +50,6 @@ obj-$(CONFIG_TIPC)		+= tipc/
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_SYSCTL)		+= sysctl_net.o
 endif
+ifeq ($(CONFIG_NET),y)
+obj-$(CONFIG_NET_NS)	+= net_ns.o
+endif
diff -puN /dev/null net/net_ns.c
--- /dev/null	2005-03-30 22:36:15.000000000 -0800
+++ lxc-dave/net/net_ns.c	2006-05-31 12:48:41.000000000 -0700
@@ -0,0 +1,319 @@
+/*
+ *  net_ns.c - adds support for network namespace
+ *
+ *  Copyright (C) 2006 IBM
+ *
+ *  Author: Daniel Lezcano <dlezcano@fr.ibm.com>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License as
+ *     published by the Free Software Foundation, version 2 of the
+ *     License.
+ */
+
+#include <linux/nsproxy.h>
+#include <linux/net_ns.h>
+#include <linux/module.h>
+
+struct net_namespace init_net_ns = {
+	.kref = {
+		.refcount	= ATOMIC_INIT(2),
+	},
+	.dev_list = {
+		 .lock = RW_LOCK_UNLOCKED,
+		 .list = LIST_HEAD_INIT(init_net_ns.dev_list.list),
+	 },
+};
+
+#ifdef CONFIG_NET_NS
+
+/*
+ * Remove a device to the namespace network devices list
+ * when registered from a namespace
+ * @dev : network device
+ * @dev_list: network namespace devices
+ * Return ENODEV if the device does not exist,
+ */
+extern int net_ns_unregister_dev(struct net_device* dev,
+				 struct net_ns_dev_list *devlist)
+{
+	struct net_ns_dev *db;
+	struct list_head *l;
+	int ret = 0;
+
+	write_lock(&devlist->lock);
+
+	list_for_each(l, &devlist->list) {
+
+		db = list_entry(l, struct net_ns_dev, list);
+		if (dev == db->dev) {
+			list_del(&db->list);
+			dev_put(dev);
+			kfree(db);
+			goto out;
+		}
+	}
+	ret = -ENODEV;
+out:
+	write_unlock(&devlist->lock);
+	return ret;
+}
+
+EXPORT_SYMBOL(net_ns_unregister_dev);
+
+/*
+ * Add a device to the namespace network devices list
+ * when registered from a namespace
+ * @dev : network device
+ * @dev_list: network namespace devices
+ * Return ENOMEM if allocation fails, 0 on success
+ */
+extern int net_ns_register_dev(struct net_device* dev,
+			       struct net_ns_dev_list *devlist)
+{
+	struct net_ns_dev *db;
+
+	db = kmalloc(sizeof(*db), GFP_KERNEL);
+	if (!db)
+		return -ENOMEM;
+
+	write_lock(&devlist->lock);
+	dev_hold(dev);
+	db->dev = dev;
+	list_add_tail(&db->list, &devlist->list);
+	write_unlock(&devlist->lock);
+
+	return 0;
+}
+
+EXPORT_SYMBOL(net_ns_register_dev);
+
+/*
+ * Add a device to the namespace network devices list
+ * @devname : network device name
+ * @dev_list: network namespace devices
+ * Return ENODEV if the device does not exist,
+ * ENOMEM if allocation fails, 0 on success
+ */
+extern int net_ns_add_dev(const char* devname,
+			  struct net_ns_dev_list *devlist)
+{
+	struct net_ns_dev *db;
+	struct net_device *dev;
+	int ret = 0;
+
+	read_lock(&dev_base_lock);
+
+	for (dev = dev_base; dev; dev = dev->next)
+		if (!strncmp(dev->name, devname, IFNAMSIZ))
+			break;
+
+	if (!dev) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	db = kmalloc(sizeof(*db), GFP_KERNEL);
+	if (!db) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	write_lock(&devlist->lock);
+	db->dev = dev;
+	dev_hold(dev);
+	list_add_tail(&db->list, &devlist->list);
+	write_unlock(&devlist->lock);
+
+out:
+	read_unlock(&dev_base_lock);
+
+	return ret;
+}
+
+EXPORT_SYMBOL(net_ns_add_dev);
+
+/*
+ * Remove a device from the namespace network devices list
+ * @devname : network device name
+ * @dev_list: network namespace devices
+ * Return ENODEV if the device does not exist, 0 on success
+ */
+extern int net_ns_remove_dev(const char* devname,
+			     struct net_ns_dev_list *devlist)
+{
+	struct net_ns_dev *db;
+	struct net_device *dev;
+	struct list_head *l;
+	int ret = 0;
+
+	write_lock(&devlist->lock);
+
+	list_for_each(l, &devlist->list) {
+
+		db = list_entry(l, struct net_ns_dev, list);
+		dev = db->dev;
+
+		if (!strncmp(dev->name, devname, IFNAMSIZ)) {
+			list_del(&db->list);
+			dev_put(dev);
+			kfree(db);
+			goto out;
+		}
+	}
+
+	ret = -ENODEV;
+
+out:
+	write_unlock(&devlist->lock);
+	return ret;
+}
+
+EXPORT_SYMBOL(net_ns_remove_dev);
+
+/*
+ * Find a namespace network device
+ * @devname : network device name
+ * @dev_list: network namespace devices
+ * Return ENODEV if the device does not exist, 0 on success
+ */
+extern  struct net_device *net_ns_find_dev_by_name(const char* devname,
+						   struct net_ns_dev_list *devlist)
+{
+	struct net_ns_dev *db;
+	struct net_device *dev;
+	struct list_head *l;
+
+	read_lock(&devlist->lock);
+
+	list_for_each(l, &devlist->list) {
+
+		db = list_entry(l, struct net_ns_dev, list);
+		dev = db->dev;
+
+		if (!strncmp(dev->name, devname, IFNAMSIZ)) {
+			dev_hold(dev);
+			goto out;
+		}
+	}
+
+	dev = NULL;
+out:
+	read_unlock(&devlist->lock);
+	return dev;
+}
+
+EXPORT_SYMBOL(net_ns_find_dev_by_name);
+
+/*
+ * Clone a new ns copying an original, setting refcount to 1
+ * Cloned process will have
+ * @old_ns: namespace to clone
+ * Return NULL on error (failure to kmalloc), new ns otherwise
+ */
+extern struct net_namespace *clone_net_ns(struct net_namespace *old_ns)
+{
+	struct net_namespace *new_ns;
+	struct net_ns_dev_list* new_dev_list;
+
+	new_ns = kmalloc(sizeof(*new_ns), GFP_KERNEL);
+	if (new_ns) {
+		kref_init(&new_ns->kref);
+		new_dev_list = &new_ns->dev_list;
+		INIT_LIST_HEAD(&new_dev_list->list);
+		new_dev_list->lock = RW_LOCK_UNLOCKED;
+	}
+
+	return new_ns;
+}
+
+/*
+ * unshare the current process' network namespace.
+ * called only in sys_unshare()
+ */
+int unshare_network(unsigned long unshare_flags,
+		    struct net_namespace **new_net)
+{
+	if (unshare_flags & CLONE_NEWNET) {
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		*new_net = clone_net_ns(current->nsproxy->net_ns);
+		if (!*new_net)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/*
+ * Copy task tsk's network namespace, or clone it if flags specifies
+ * CLONE_NEWNET.  In latter case, changes to the network ressources of
+ * this process won't be seen by parent, and vice versa.
+ */
+int copy_network(int flags, struct task_struct *tsk)
+{
+	struct net_namespace *old_ns = tsk->nsproxy->net_ns;
+	struct net_namespace *new_ns;
+	int err = 0;
+
+	if (!old_ns)
+		return 0;
+
+	get_net_ns(old_ns);
+
+	if (!(flags & CLONE_NEWNET))
+		return 0;
+
+	if (!capable(CAP_SYS_ADMIN)) {
+		err = -EPERM;
+		goto out;
+	}
+
+	new_ns = clone_net_ns(old_ns);
+	if (!new_ns) {
+		err = -ENOMEM;
+		goto out;
+	}
+	tsk->nsproxy->net_ns = new_ns;
+
+out:
+	put_net_ns(old_ns);
+	return err;
+}
+
+/*
+ * Clean the network namespace device list
+ * @dev_list: network namespace devices
+ */
+static int free_net_ns_dev(struct net_ns_dev_list *devlist)
+{
+	struct list_head *l, *next;
+	struct net_ns_dev *db;
+	struct net_device *dev;
+
+	write_lock(&devlist->lock);
+
+	list_for_each_safe(l, next, &devlist->list) {
+		db = list_entry(l, struct net_ns_dev, list);
+		dev = db->dev;
+		list_del(&db->list);
+		dev_put(dev);
+		kfree(db);
+	}
+
+	write_unlock(&devlist->lock);
+
+	return 0;
+}
+
+extern void free_net_ns(struct kref *kref)
+{
+	struct net_namespace *ns;
+
+	ns = container_of(kref, struct net_namespace, kref);
+	free_net_ns_dev(&ns->dev_list);
+	kfree(ns);
+}
+
+#endif
_
