Skip to main content

5 posts tagged with "docker"

View All Tags

docker-compose spec

· One min read

docker-compose 的spec

TOP :
version
service
network
volumes
config
secrets

service : build deploy

Each service MAY also include a Build section, which defines how to create the Docker image for the service. Compose implementations MAY support building docker images using this service definition. If not implemented the Build section SHOULD be ignored and the Compose file MUST still be considered valid.

Build support is an OPTIONAL aspect of the Compose specification, and is described in detail here

Each Service defines runtime constraints and requirements to run its containers. The deploy section groups these constraints and allows the platform to adjust the deployment strategy to best match containers' needs with available resources.

相关文档

namespace与docker

· One min read

CLONE_NEWUTS (since Linux 2.6.19) If CLONE_NEWUTS is set, then create the process in a new UTS namespace, whose identifiers are initialized by duplicating the identifiers from the UTS namespace of the calling process. If this flag is not set, then (as with fork(2)) the process is created in the same UTS namespace as the calling process.

docker stop

· One min read

docker的stop本质就是kill -9 ,一个特别的信号而已。具体实现得看代码 // todo

docker与iptable和网桥

· 8 min read

如何创建网桥

创建网桥,可以通过bridge-utils包的brctl来创建一个网桥

$sudo brctl addbr br0

然后通过brctl show可以看到列出的网桥

$brctl  show
bridge name bridge id STP enabled interfaces
br0 8000.000000000000 no

通过strace查看系统调用

$sudo strace  brctl addbr br1

输出

ubuntu@VM-0-3-ubuntu:~/libnlbuild/bin$ sudo strace  brctl addbr br1
...
socket(AF_UNIX, SOCK_STREAM, 0) = 3
ioctl(3, SIOCBRADDBR, "br1") = 0
+++ exited with 0 +++

看到调用

ioctl(3, SIOCBRADDBR, "br1") 

3 指的是打开的文件描述符.0,1,2都是特殊的标准输入输出错误等的文件描述符,所以下一个打开的文件就是3

我写的一个创建网桥的小例子

//  bradd.c
#include <linux/sockios.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <stdio.h>
int main(){
int br_socket_fd,ret;
if(br_socket_fd = socket(AF_LOCAL, SOCK_STREAM, 0) < 0){
perror("Error: ");
}
if(ret = ioctl(br_socket_fd, SIOCBRADDBR,"hello") < 0) // SIOCBRADDBR 由sockios.h 引入
{
perror("ioctl error");
}
return 0;
}
$gcc bradd.c -o 
## 需要使用sudo添加网桥
$sudo ./bradd

然后用brctl show 输出,创建了一个叫hello的网桥:

$ brctl show
bridge name bridge id STP enabled interfaces
docker0 8000.024273119fd1 no vethe6cf6a0
hello 8000.000000000000 no

然后我们发现了docker0hello两个网桥相差一个interfaces,我们如何添加veth呢?

  • 在brctl 中可以使用brctl addif
int br_add_interface(const char *bridge, const char *dev)
{
struct ifreq ifr;
...
int ifindex = if_nametoindex(dev);
...
strncpy(ifr.ifr_name, bridge, IFNAMSIZ);
ifr.ifr_ifindex = ifindex;
err = ioctl(br_socket_fd, SIOCBRADDIF, &ifr);
...
}

最后调用linux 的net/bridge/br_if.c:

// dev 是我们要添加的设备 // br 是我们的网桥

/* called with RTNL */
int br_add_if(struct net_bridge *br, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct net_bridge_port *p;
int err = 0;
unsigned br_hr, dev_hr;
bool changed_addr;

...
p = new_nbp(br, dev);
if (IS_ERR(p))
return PTR_ERR(p);

call_netdevice_notifiers(NETDEV_JOIN, dev);

err = dev_set_allmulti(dev, 1);
if (err) {
kfree(p); /* kobject not yet init'd, manually free */
goto err1;
}

err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
SYSFS_BRIDGE_PORT_ATTR);
if (err)
goto err2;

err = br_sysfs_addif(p);
if (err)
goto err2;

err = br_netpoll_enable(p);
if (err)
goto err3;

err = netdev_rx_handler_register(dev, br_handle_frame, p);
if (err)
goto err4;

dev->priv_flags |= IFF_BRIDGE_PORT;

err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
if (err)
goto err5;

err = nbp_switchdev_mark_set(p);
if (err)
goto err6;

dev_disable_lro(dev);

list_add_rcu(&p->list, &br->port_list);

nbp_update_port_count(br);

netdev_update_features(br->dev);

br_hr = br->dev->needed_headroom;
dev_hr = netdev_get_fwd_headroom(dev);
if (br_hr < dev_hr)
update_headroom(br, dev_hr);
else
netdev_set_rx_headroom(dev, br_hr);

if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");

if (br->dev->addr_assign_type != NET_ADDR_SET) {
/* Ask for permission to use this MAC address now, even if we
* don't end up choosing it below.
*/
err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
if (err)
goto err7;
}

err = nbp_vlan_init(p, extack);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
goto err7;
}

spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);

if (netif_running(dev) && netif_oper_up(dev) &&
(br->dev->flags & IFF_UP))
br_stp_enable_port(p);
spin_unlock_bh(&br->lock);

br_ifinfo_notify(RTM_NEWLINK, NULL, p);

if (changed_addr)
call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);

br_mtu_auto_adjust(br);
br_set_gso_limits(br);

kobject_uevent(&p->kobj, KOBJ_ADD);

return 0;
...
}

添加虚拟设备:

# strace  ip link add vethaaa type veth peer name vethbbb
execve("/sbin/ip", ["ip", "link", "add", "vethaaa", "type", "veth", "peer", "name", "vethbbb"], 0x7ffed8af30f0 /* 23 vars */)
...
socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
setsockopt(3, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0
bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, nl_pid=26226, nl_groups=00000000}, [12]) = 0
sendto(3, {{len=32, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=52, type=NLMSG_ERROR, flags=0, seq=0, pid=26226}, {error=-ENODEV, msg={{len=32, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}}}, iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52
access("/proc/net", R_OK) = 0
access("/proc/net/unix", R_OK) = 0
socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 4
ioctl(4, SIOCGIFINDEX, {ifr_name="vethaaa"}) = -1 ENODEV (No such device)
close(4) = 0
brk(NULL) = 0x560e12455000
brk(0x560e12476000) = 0x560e12476000
openat(AT_FDCWD, "/usr/lib/ip/link_veth.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=92, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1576836139, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, [{{nla_len=12, nla_type=IFLA_IFNAME}, "vethaaa"}, {{nla_len=48, nla_type=IFLA_LINKINFO}, [{{nla_len=8, nla_type=IFLA_INFO_KIND}, "veth"...}, {{nla_len=36, nla_type=IFLA_INFO_DATA}, "\x20\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x0c\x00\x03\x00\x76\x65\x74\x68\x62\x62\x62\x00"}]}]}, iov_len=92}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 92
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 36
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=36, type=NLMSG_ERROR, flags=NLM_F_CAPPED, seq=1576836139, pid=26226}, {error=0, msg={len=92, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1576836139, pid=0}}}, iov_len=36}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 36

socket(AF_NETLINK, SOCK_RAW|SOCK_CLOEXEC, NETLINK_ROUTE) = 3
setsockopt(3, SOL_SOCKET, SO_SNDBUF, [32768], 4) = 0
setsockopt(3, SOL_SOCKET, SO_RCVBUF, [1048576], 4) = 0
setsockopt(3, SOL_NETLINK, NETLINK_EXT_ACK, [1], 4) = 0
bind(3, {sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, 12) = 0
getsockname(3, {sa_family=AF_NETLINK, nl_pid=18263, nl_groups=00000000}, [12]) = 0
sendto(3, {{len=32, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}, 32, 0, NULL, 0) = 32
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=52, type=NLMSG_ERROR, flags=0, seq=0, pid=18263}, {error=-EPERM, msg={{len=32, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK, seq=0, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}}}}, iov_len=16384}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 52
access("/proc/net", R_OK) = 0
access("/proc/net/unix", R_OK) = 0
socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0) = 4
ioctl(4, SIOCGIFINDEX, {ifr_name="p1"}) = -1 ENODEV (No such device)
close(4) = 0
brk(NULL) = 0x5595d01bb000
brk(0x5595d01dc000) = 0x5595d01dc000
openat(AT_FDCWD, "/usr/lib/ip/link_veth.so", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
sendmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=84, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1576748752, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, [{{nla_len=7, nla_type=IFLA_IFNAME}, "p1"}, {{nla_len=44, nla_type=IFLA_LINKINFO}, [{{nla_len=8, nla_type=IFLA_INFO_KIND}, "veth"...}, {{nla_len=32, nla_type=IFLA_INFO_DATA}, "\x1c\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x07\x00\x03\x00\x70\x32\x00\x00"}]}]}, iov_len=84}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 84
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base=NULL, iov_len=0}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_TRUNC}, MSG_PEEK|MSG_TRUNC) = 104
recvmsg(3, {msg_name={sa_family=AF_NETLINK, nl_pid=0, nl_groups=00000000}, msg_namelen=12, msg_iov=[{iov_base={{len=104, type=NLMSG_ERROR, flags=0, seq=1576748752, pid=18263}, {error=-EPERM, msg={{len=84, type=RTM_NEWLINK, flags=NLM_F_REQUEST|NLM_F_ACK|NLM_F_EXCL|NLM_F_CREATE, seq=1576748752, pid=0}, {ifi_family=AF_UNSPEC, ifi_type=ARPHRD_NETROM, ifi_index=0, ifi_flags=0, ifi_change=0}, [{{nla_len=7, nla_type=IFLA_IFNAME}, "p1"}, {{nla_len=44, nla_type=IFLA_LINKINFO}, [{{nla_len=8, nla_type=IFLA_INFO_KIND}, "veth"...}, {{nla_len=32, nla_type=IFLA_INFO_DATA}, "\x1c\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x07\x00\x03\x00\x70\x32\x00\x00"}]}]}}}, iov_len=104}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, 0) = 104
write(2, "RTNETLINK answers: Operation not"..., 43RTNETLINK answers: Operation not permitted
) = 43
exit_group(2) = ?
+++ exited with 2 +++

linux 相关的netlink veth内容:

// drivers\net\veth.c
static struct rtnl_link_ops veth_link_ops = {
.kind = DRV_NAME,
.priv_size = sizeof(struct veth_priv),
.setup = veth_setup,
.validate = veth_validate,
.newlink = veth_newlink,
.dellink = veth_dellink,
.policy = veth_policy,
.maxtype = VETH_INFO_MAX,
.get_link_net = veth_get_link_net,
};
E:\linux-master\net\netlink\af_netlink.c
static const struct proto_ops netlink_ops = {
.family = PF_NETLINK,
.owner = THIS_MODULE,
.release = netlink_release,
.bind = netlink_bind,
.connect = netlink_connect,
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
.poll = datagram_poll,
.ioctl = netlink_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = netlink_setsockopt,
.getsockopt = netlink_getsockopt,
.sendmsg = netlink_sendmsg,
.recvmsg = netlink_recvmsg,
.mmap = sock_no_mmap,
.sendpage = sock_no_sendpage,
};

添加veth 设备

首先是添加socket

(gdb) bt
#0 socket () at ../sysdeps/unix/syscall-template.S:78
#1 0x00005555555b60c7 in rtnl_open_byproto (rth=0x5555557d8020 <rth>, subscriptions=0, protocol=<optimized out>) at libnetlink.c:194
#2 0x000055555555f956 in main (argc=9, argv=0x7fffffffe548) at ip.c:308
Breakpoint 6, __libc_sendmsg (fd=3, msg=msg@entry=0x7fffffffdd70, flags=flags@entry=0) at ../sysdeps/unix/sysv/linux/sendmsg.c:28
28 ../sysdeps/unix/sysv/linux/sendmsg.c: No such file or directory.
(gdb) bt
#0 __libc_sendmsg (fd=3, msg=msg@entry=0x7fffffffdd70, flags=flags@entry=0) at ../sysdeps/unix/sysv/linux/sendmsg.c:28
#1 0x00005555555b5c8f in __rtnl_talk_iov (rtnl=0x5555557d8020 <rth>, iov=iov@entry=0x7fffffffddf0, iovlen=iovlen@entry=1, answer=answer@entry=0x0, show_rtnl_err=show_rtnl_err@entry=true,
errfn=0x0) at libnetlink.c:887
#2 0x00005555555b7225 in __rtnl_talk (errfn=0x0, show_rtnl_err=true, answer=<optimized out>, n=0x7fffffffde40, rtnl=<optimized out>) at libnetlink.c:1000
#3 rtnl_talk (rtnl=<optimized out>, n=n@entry=0x7fffffffde40, answer=answer@entry=0x0) at libnetlink.c:1006
#4 0x000055555557bc6e in iplink_modify (cmd=cmd@entry=16, flags=flags@entry=1536, argc=3, argc@entry=6, argv=<optimized out>, argv@entry=0x7fffffffe560) at iplink.c:1084
#5 0x000055555557c0c6 in do_iplink (argc=7, argv=0x7fffffffe558) at iplink.c:1641
#6 0x000055555555ff0c in do_cmd (argv0=0x7fffffffe7d8 "link", argc=8, argv=0x7fffffffe550) at ip.c:113
#7 0x000055555555f9a0 in main (argc=9, argv=0x7fffffffe548) at ip.c:317

比如命令ip link add veth_0 type veth peer name veth_0_peer 初始化的时候req.n 的长度是32

 p req.n.nlmsg_len 
$1 = 32

经过ret = iplink_parse(argc, argv, &req, &type); 后变成44,

(gdb) p ((char *)n)[32]@64
$50 = "\v\000\003\000veth_0\000\000\064\000\022\000\b\000\001\000veth(\000\002\000$\000\001", '\000' <repeats 17 times>, "\020\000\003\000veth_0_peer"

iptables是什么?

# type iptables
iptables is hashed (/sbin/iptables)

iptables命令为什么可以处理那些问题呢?

iptable原理

iptable就是通过socket netlink做特别的通信,改变netfilter子系统的相关hook

源码 相关阅读

相关阅读