diff --git a/.gitignore b/.gitignore index 2270e137..1e29a47b 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ clab-mini-lab .mini-lab.clab.yml .mini-lab.cumulus.yml +.mini-lab.sonic.yml ansible-common metal-hammer* requirements.yaml diff --git a/Makefile b/Makefile index dda0fff1..784ac0bf 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ .EXPORT_ALL_VARIABLES: # Commands -YQ=docker run --rm -i -v $(shell pwd):/workdir mikefarah/yq:3 /bin/sh -c +YQ=docker run --rm -i -v $(shell pwd):/workdir mikefarah/yq:4 KINDCONFIG := $(or $(KINDCONFIG),control-plane/kind.yaml) KUBECONFIG := $(shell pwd)/.kubeconfig @@ -19,7 +19,8 @@ MINI_LAB_VM_IMAGE := $(or $(MINI_LAB_VM_IMAGE),ghcr.io/metal-stack/mini-lab-vms: MACHINE_OS=ubuntu-22.04 -SONIC_REMOTE_IMG := https://sonic-build.azurewebsites.net/api/sonic/artifacts?branchName=master&platform=vs&buildId=125016&target=target%2Fsonic-vs.img.gz +# Check: https://sonic-build.azurewebsites.net/ui/sonic/pipelines +SONIC_REMOTE_IMG := https://sonic-build.azurewebsites.net/api/sonic/artifacts?branchName=202211&platform=vs&target=target%2Fsonic-vs.img.gz # Machine flavors ifeq ($(MINI_LAB_FLAVOR),default) @@ -94,8 +95,8 @@ env: .PHONY: _ips _ips: - $(eval ipL1 = $(shell ${YQ} "yq r clab-mini-lab/ansible-inventory.yml 'all.children.cvx.hosts.leaf01.ansible_host'")) - $(eval ipL2 = $(shell ${YQ} "yq r clab-mini-lab/ansible-inventory.yml 'all.children.cvx.hosts.leaf02.ansible_host'")) + $(eval ipL1 = $(shell ${YQ} --unwrapScalar=true '.nodes.leaf01."mgmt-ipv4-address"' clab-mini-lab/topology-data.json)) + $(eval ipL2 = $(shell ${YQ} --unwrapScalar=true '.nodes.leaf02."mgmt-ipv4-address"' clab-mini-lab/topology-data.json)) $(eval staticR = "100.255.254.0/24 nexthop via $(ipL1) dev docker0 nexthop via $(ipL2) dev docker0") .PHONY: route diff --git a/deploy_partition.yaml b/deploy_partition.yaml index 99eb52ff..c885de9d 100644 --- a/deploy_partition.yaml +++ b/deploy_partition.yaml @@ -1,12 +1,30 @@ --- -- name: deploy leaves - hosts: leaves +- name: Configure SONiC switches + hosts: leaves:&sonic + gather_facts: no + pre_tasks: + - name: Wait for system to become reachable + ansible.builtin.wait_for_connection: + delay: 10 + timeout: 50 + roles: + - name: ansible-common + tags: always + - name: metal-roles/partition/roles/sonic + tags: sonic + - name: sonic + tags: sonic + +- name: Configure Cumulus switches + hosts: leaves:&cumulus roles: - name: metal-roles/partition/roles/leaf tags: leaf + - name: internet + tags: internet -- name: deploy docker - hosts: leaves +- name: Deploy docker on Cumulus switches + hosts: leaves:&cumulus pre_tasks: # the following task is not required as long as we do not install something from the cumulus repositories, for which all the keys are expired now # the one from here has also expired on 9th Apr 2024: https://docs.nvidia.com/networking-ethernet-software/knowledge-base/Installing-and-Upgrading/Upgrading/Update-Expired-GPG-Keys/#package-upgrade-from-cumulus-linux-37x-to-3716 @@ -23,7 +41,7 @@ - name: metal-roles/partition/roles/docker-on-cumulus tags: docker-on-cumulus -- name: deploy dhcp server and pixiecore +- name: Deploy dhcp server and pixiecore on leaf01 hosts: leaf01 vars: setup_yaml: @@ -37,7 +55,7 @@ - name: metal-roles/partition/roles/pixiecore tags: pixiecore -- name: deploy metal-core +- name: Deploy metal-core hosts: leaves vars: setup_yaml: @@ -46,12 +64,10 @@ roles: - name: ansible-common tags: always - - name: internet - tags: internet - name: metal-roles/partition/roles/metal-core tags: metal-core -- name: wait for switches +- name: Wait for switches hosts: localhost connection: local gather_facts: no diff --git a/files/inet/daemons b/files/inet/daemons new file mode 100644 index 00000000..67b18311 --- /dev/null +++ b/files/inet/daemons @@ -0,0 +1,41 @@ +bgpd=yes +ospfd=no +ospf6d=no +ripd=no +ripngd=no +isisd=no +pimd=no +pim6d=no +ldpd=no +nhrpd=no +eigrpd=no +babeld=no +sharpd=no +pbrd=no +bfdd=no +fabricd=no +vrrpd=no +pathd=no + +vtysh_enable=yes +zebra_options=" -A 127.0.0.1 -s 90000000" +mgmtd_options=" -A 127.0.0.1" +bgpd_options=" -A 127.0.0.1" +ospfd_options=" -A 127.0.0.1" +ospf6d_options=" -A ::1" +ripd_options=" -A 127.0.0.1" +ripngd_options=" -A ::1" +isisd_options=" -A 127.0.0.1" +pimd_options=" -A 127.0.0.1" +pim6d_options=" -A ::1" +ldpd_options=" -A 127.0.0.1" +nhrpd_options=" -A 127.0.0.1" +eigrpd_options=" -A 127.0.0.1" +babeld_options=" -A 127.0.0.1" +sharpd_options=" -A 127.0.0.1" +pbrd_options=" -A 127.0.0.1" +staticd_options="-A 127.0.0.1" +bfdd_options=" -A 127.0.0.1" +fabricd_options="-A 127.0.0.1" +vrrpd_options=" -A 127.0.0.1" +pathd_options=" -A 127.0.0.1" diff --git a/files/inet/frr.conf b/files/inet/frr.conf new file mode 100644 index 00000000..0e32b873 --- /dev/null +++ b/files/inet/frr.conf @@ -0,0 +1,57 @@ +frr defaults datacenter +hostname inet +! +log syslog informational +! +vrf vrfInternet + vni 104009 + ip route 0.0.0.0/0 172.17.0.1 +exit-vrf +! +interface eth2 + ipv6 nd ra-interval 6 + no ipv6 nd suppress-ra +! +interface eth3 + ipv6 nd ra-interval 6 + no ipv6 nd suppress-ra +! +interface lo + ip address 10.0.0.21/32 +! +router bgp 4200000021 + bgp router-id 10.0.0.21 + bgp bestpath as-path multipath-relax + neighbor FABRIC peer-group + neighbor FABRIC remote-as external + neighbor FABRIC timers 1 3 + neighbor eth1 interface peer-group FABRIC + neighbor eth2 interface peer-group FABRIC + ! + address-family ipv4 unicast + redistribute connected route-map LOOPBACKS + exit-address-family + ! + address-family l2vpn evpn + advertise-all-vni + neighbor FABRIC activate + neighbor FABRIC allowas-in 2 + exit-address-family +! +router bgp 4200000021 vrf vrfInternet + bgp router-id 10.0.0.21 + bgp bestpath as-path multipath-relax + ! + address-family ipv4 unicast + redistribute static + exit-address-family + ! + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +! +route-map LOOPBACKS permit 10 + match interface lo +! +line vty +! \ No newline at end of file diff --git a/files/inet/network.sh b/files/inet/network.sh new file mode 100644 index 00000000..5b0313c2 --- /dev/null +++ b/files/inet/network.sh @@ -0,0 +1,29 @@ +#!/bin/sh +set -o errexit -o xtrace + +ip link add vrfInternet type vrf table 1000 +ip link set dev vrfInternet up +ip link set dev eth0 master vrfInternet + +ip link add name bridge type bridge stp_state 0 +ip link set dev bridge type bridge vlan_filtering 1 +ip link set dev bridge mtu 9000 +ip link set dev bridge up + +ip link add link bridge up name vlanInternet type vlan id 1000 +ip link set dev vlanInternet mtu 9000 +ip link set dev vlanInternet master vrfInternet +bridge vlan del vid 1 dev bridge self +bridge vlan add vid 1000 dev bridge self +ip link set dev vlanInternet up + +ip link add vniInternet type vxlan id 104009 dstport 4789 local 10.0.0.21 nolearning +ip link set dev vlanInternet mtu 9000 +ip link set dev vniInternet master bridge +bridge vlan del vid 1 dev vniInternet +bridge vlan del vid 1 untagged pvid dev vniInternet +bridge vlan add vid 1000 dev vniInternet +bridge vlan add vid 1000 untagged pvid dev vniInternet +ip link set up dev vniInternet + +iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE diff --git a/files/inet/vtysh.conf b/files/inet/vtysh.conf new file mode 100644 index 00000000..e0ab9cb6 --- /dev/null +++ b/files/inet/vtysh.conf @@ -0,0 +1 @@ +service integrated-vtysh-config diff --git a/images/sonic/Dockerfile b/images/sonic/Dockerfile new file mode 100644 index 00000000..76afa0d6 --- /dev/null +++ b/images/sonic/Dockerfile @@ -0,0 +1,20 @@ +FROM ubuntu:22.04 + +ENV LIBGUESTFS_BACKEND=direct + +RUN apt-get update && \ + apt-get --no-install-recommends install --yes \ + linux-image-5.15.0-102-generic \ + net-tools \ + ovmf \ + python3 \ + python3-guestfs \ + qemu-system-x86 \ + telnet \ + tini + +ENTRYPOINT ["/usr/bin/tini", "--"] + +COPY config_db.json mirror_tap_to_eth.sh sonic_entrypoint.py / + +CMD ["/usr/bin/python3", "-u", "/sonic_entrypoint.py"] diff --git a/images/sonic/config_db.json b/images/sonic/config_db.json new file mode 100644 index 00000000..3c4269c5 --- /dev/null +++ b/images/sonic/config_db.json @@ -0,0 +1,72 @@ +{ + "AUTO_TECHSUPPORT": { + "GLOBAL": { + "state": "disabled" + } + }, + "DEVICE_METADATA": { + "localhost": { + "docker_routing_config_mode": "split-unified", + "hostname": "{{ hostname }}", + "hwsku": "Force10-S6000", + "mac": "{{ mac }}", + "platform": "x86_64-kvm_x86_64-r0", + "type": "LeafRouter" + } + }, + "FEATURE": { + "mgmt-framework": { + "state": "disabled" + }, + "pmon": { + "state": "disabled" + }, + "snmp": { + "state": "disabled" + }, + "telemetry": { + "state": "disabled" + } + }, + "MGMT_INTERFACE": null, + "MGMT_PORT": { + "eth0": { + "alias": "eth0", + "admin_status": "up" + } + }, + "PORT": { + "Ethernet0": { + "lanes": "25,26,27,28", + "alias": "fortyGigE0/0", + "index": "0", + "speed": "40000", + "admin_status": "up", + "mtu": "9100" + }, + "Ethernet4": { + "lanes": "29,30,31,32", + "alias": "fortyGigE0/4", + "index": "1", + "speed": "40000", + "admin_status": "up", + "mtu": "9100" + }, + "Ethernet8": { + "lanes": "33,34,35,36", + "alias": "fortyGigE0/8", + "index": "2", + "speed": "40000", + "admin_status": "up", + "mtu": "9100" + }, + "Ethernet12": { + "lanes": "37,38,39,40", + "alias": "fortyGigE0/12", + "index": "3", + "speed": "40000", + "admin_status": "up", + "mtu": "9100" + } + } +} diff --git a/scripts/mirror_tap_to_eth.sh b/images/sonic/mirror_tap_to_eth.sh similarity index 100% rename from scripts/mirror_tap_to_eth.sh rename to images/sonic/mirror_tap_to_eth.sh diff --git a/images/sonic/sonic_entrypoint.py b/images/sonic/sonic_entrypoint.py new file mode 100755 index 00000000..6b2a3803 --- /dev/null +++ b/images/sonic/sonic_entrypoint.py @@ -0,0 +1,211 @@ +#!/usr/bin/python3 +import fcntl +import json +import logging +import os +import signal +import socket +import struct +import subprocess +import sys +import time + +import guestfs +from guestfs import GuestFS + +BASE_IMG = '/sonic-vs.img' + + +class Qemu: + def __init__(self, name: str, memory: str, interfaces: int): + self._name = name + self._memory = memory + self._interfaces = interfaces + self._p = None + self._disk = '/overlay.img' + + def prepare_overlay(self, base: str) -> None: + cmd = [ + 'qemu-img', + 'create', + '-f', 'qcow2', + '-F', 'qcow2', + '-b', base, + self._disk, + ] + subprocess.run(cmd, check=True) + + def guestfs(self) -> GuestFS: + g = guestfs.GuestFS(python_return_dict=True) + g.add_drive_opts(filename=self._disk, format="qcow2", readonly=False) + g.launch() + g.mount('/dev/sda3', '/') + return g + + def start(self) -> None: + cmd = [ + 'qemu-system-x86_64', + '-cpu', 'host', + '-display', 'none', + '-enable-kvm', + '-machine', 'q35', + '-name', self._name, + '-m', self._memory, + '-drive', f'if=virtio,format=qcow2,file={self._disk}', + '-serial', 'telnet:127.0.0.1:5000,server,nowait', + ] + + for i in range(self._interfaces): + with open(f'/sys/class/net/eth{i}/address', 'r') as f: + mac = f.read().strip() + cmd.append('-device') + cmd.append(f'virtio-net,netdev=hn{i},mac={mac}') + cmd.append(f'-netdev') + cmd.append(f'tap,id=hn{i},ifname=tap{i},script=/mirror_tap_to_eth.sh,downscript=no') + + self._p = subprocess.Popen(cmd) + + def wait(self) -> None: + self._p.wait() + + +def initial_configuration(g: GuestFS) -> None: + image = g.glob_expand('/image-*')[0] + + g.rm(image + 'platform/firsttime') + + systemd_system = image + 'rw/etc/systemd/system/' + sonic_target_wants = systemd_system + 'sonic.target.wants/' + g.mkdir_p(sonic_target_wants) + + # Workaround: Speed up lldp startup by remove hardcoded wait of 90 seconds + g.ln_s(linkname=systemd_system + 'aaastatsd.timer', target='/dev/null') # Radius + g.ln_s(linkname=systemd_system + 'featured.timer', target='/dev/null') # Feature handling not necessary + g.ln_s(linkname=systemd_system + 'hostcfgd.timer', target='/dev/null') # After boot Host configuration + # Started by featured + g.ln_s(linkname=sonic_target_wants + 'lldp.service', target='/lib/systemd/system/lldp.service') + + # Workaround: Only useful for BackEndToRRouter + g.ln_s(linkname=systemd_system + 'backend-acl.service', target='/dev/null') + + # Workaround: We don't need LACP + g.ln_s(linkname=systemd_system + 'teamd.service', target='/dev/null') + + # Workaround: Python module sonic_platform not present on vs images + g.ln_s(linkname=systemd_system + 'system-health.service', target='/dev/null') + g.ln_s(linkname=systemd_system + 'watchdog-control.service', target='/dev/null') + + etc_sonic = image + 'rw/etc/sonic/' + g.mkdir_p(etc_sonic) + sonic_version = image.removeprefix('/image-').removesuffix('/') + sonic_environment = f''' + SONIC_VERSION=${sonic_version} + PLATFORM=x86_64-kvm_x86_64-r0 + HWSKU=Force10-S6000 + DEVICE_TYPE=LeafRouter + ASIC_TYPE=vs + '''.encode('utf-8') + g.write(path=etc_sonic + 'sonic-environment', content=sonic_environment) + + with open('/config_db.json') as f: + config_db = json.load(f) + + config_db['DEVICE_METADATA']['localhost']['hostname'] = socket.gethostname() + config_db['DEVICE_METADATA']['localhost']['mac'] = get_mac_address('eth0') + cidr = get_ip_address('eth0') + '/16' + config_db['MGMT_INTERFACE'] = { + f'eth0|{cidr}': { + 'gwaddr': get_default_gateway() + } + } + + config_db_json = json.dumps(config_db, indent=4, sort_keys=True) + g.write(path=etc_sonic + 'config_db.json', content=config_db_json.encode('utf-8')) + + if os.path.exists('/authorized_keys'): + g.mkdir_p(image + 'rw/root/.ssh') + g.chmod(mode=0x0600, path=image + 'rw/root/.ssh') + g.copy_in(localpath='/authorized_keys', remotedir=image + 'rw/root/.ssh/') + g.chown(owner=0, group=0, path=image + 'rw/root/.ssh/authorized_keys') + + +def main(): + signal.signal(signal.SIGINT, handle_exit) + signal.signal(signal.SIGTERM, handle_exit) + + logging.basicConfig(level=logging.INFO, stream=sys.stdout) + logger = logging.getLogger() + + name = os.getenv('CLAB_LABEL_CLAB_NODE_NAME', default='switch') + memory = os.getenv('VM_MEMORY', default='2048') + interfaces = int(os.getenv('CLAB_INTFS', 0)) + 1 + + vm = Qemu(name, memory, interfaces) + + logger.info('Prepare disk') + vm.prepare_overlay(BASE_IMG) + + logger.info('Deploy initial config') + g = vm.guestfs() + initial_configuration(g) + g.shutdown() + g.close() + + logger.info(f'Waiting for {interfaces} interfaces to be connected') + wait_until_all_interfaces_are_connected(interfaces) + + logger.info('Start QEMU') + vm.start() + + logger.info('Wait until QEMU is terminated') + vm.wait() + + +def handle_exit(signal, frame): + sys.exit(0) + + +def wait_until_all_interfaces_are_connected(interfaces: int) -> None: + while True: + i = 0 + for iface in os.listdir('/sys/class/net/'): + if iface.startswith('eth'): + i += 1 + if i == interfaces: + break + time.sleep(1) + + +def get_ip_address(iface: str) -> str: + # Source: https://bit.ly/3dROGBN + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + return socket.inet_ntoa(fcntl.ioctl( + s.fileno(), + 0x8915, # SIOCGIFADDR + struct.pack('256s', iface.encode('utf_8')) + )[20:24]) + + +def get_mac_address(iface: str) -> str: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + mac = fcntl.ioctl( + s.fileno(), + 0x8927, # SIOCGIFHWADDR + struct.pack('256s', iface.encode('utf-8')) + )[18:24] + return ':'.join('%02x' % b for b in mac) + + +def get_default_gateway() -> str: + # Source: https://splunktool.com/python-get-default-gateway-for-a-local-interfaceip-address-in-linux + with open("/proc/net/route") as fh: + for line in fh: + fields = line.strip().split() + if fields[1] != '00000000' or not int(fields[3], 16) & 2: + # If not default route or not RTF_GATEWAY, skip it + continue + return socket.inet_ntoa(struct.pack(" None: - cmd = [ - 'qemu-img', - 'create', - '-f', 'qcow2', - '-b', base, - self._disk, - ] - subprocess.run(cmd, check=True) - - def start(self) -> None: - cmd = [ - 'qemu-system-x86_64', - '-cpu', 'host', - '-display', 'none', - '-enable-kvm', - '-machine', 'q35', - '-name', self._name, - '-m', self._memory, - '-drive', f'if=virtio,format=qcow2,file={self._disk}', - '-serial', 'telnet:127.0.0.1:5000,server,nowait', - ] - - for i in range(self._interfaces): - with open(f'/sys/class/net/eth{i}/address', 'r') as f: - mac = f.read().strip() - cmd.append('-device') - cmd.append(f'virtio-net,netdev=hn{i},mac={mac}') - cmd.append(f'-netdev') - cmd.append(f'tap,id=hn{i},ifname=tap{i},script=/mini-lab/mirror_tap_to_eth.sh,downscript=no') - - self._p = subprocess.Popen(cmd) - - def wait(self) -> None: - self._p.wait() - - -class Telnet: - def __init__(self): - self._tn: telnetlib.Telnet | None = None - - def connect(self, host: str, port: int, max_retries=60) -> bool: - for i in range(1, max_retries + 1): - try: - self._tn = telnetlib.Telnet(host, port) - return True - except: - time.sleep(1) - if i == max_retries: - return False - - def close(self): - self._tn.close() - - def wait_until(self, match: str): - self._tn.read_until(match.encode('ascii')) - - def write_and_wait(self, data: str, match: str = '$ ') -> str: - self._tn.write(data.encode('ascii') + b'\n') - return self._tn.read_until(match.encode('ascii')).decode('utf-8') - - def write_test(self, data: str) -> str: - self._tn.write(data.encode('ascii') + b'\n') - time.sleep(5) - return self._tn.read_some().decode('utf-8') - - -def main(): - signal.signal(signal.SIGINT, handle_exit) - signal.signal(signal.SIGTERM, handle_exit) - - logging.basicConfig(level=logging.INFO, stream=sys.stdout) - logger = logging.getLogger() - - name = os.getenv('CLAB_LABEL_CLAB_NODE_NAME', default='switch') - memory = os.getenv('VM_MEMORY', default='2048') - interfaces = int(os.getenv('CLAB_INTFS', 0)) + 1 - - logger.info(f'Waiting for {interfaces} interfaces to be connected') - wait_until_all_interfaces_are_connected(interfaces) - - vm = Qemu(name, memory, interfaces) - - logger.info('Prepare disk') - vm.prepare_overlay(BASE_IMG) - - logger.info('Start QEMU') - vm.start() - - logger.info('Try to connect via telnet...') - tn = Telnet() - if not tn.connect('127.0.0.1', 5000): - logger.error('Cannot connect to telnet server') - sys.exit(1) - - logger.info('Connected via telnet and waiting for login prompt') - tn.wait_until('login: ') - - logger.info('Try to login') - tn.write_and_wait(USER, 'Password: ') - tn.write_and_wait(PASSWORD) - - logger.info('Authorize ssh key') - authorize_ssh_key(tn) - - logger.info('Wait until config-setup is done') - if not wait_until_config_setup_is_done(tn): - logger.error('config-setup still not done') - sys.exit(1) - - net = get_ip_address('eth0') + '/16' - logger.info(f'Configure {net} on eth0') - tn.write_and_wait(f'sudo config interface ip add eth0 {net}') - tn.write_and_wait('sudo config save --yes') - - tn.close() - - logger.info('Wait until QEMU terminated') - vm.wait() - - -def handle_exit(signal, frame): - sys.exit(0) - - -def wait_until_all_interfaces_are_connected(interfaces: int) -> None: - while True: - i = 0 - for iface in os.listdir('/sys/class/net/'): - if iface.startswith('eth'): - i += 1 - if i == interfaces: - break - time.sleep(1) - - -def get_ip_address(iface: str) -> str: - # Source: https://bit.ly/3dROGBN - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - return socket.inet_ntoa(fcntl.ioctl( - s.fileno(), - 0x8915, # SIOCGIFADDR - struct.pack('256s', iface.encode('utf_8')) - )[20:24]) - - -def wait_until_config_setup_is_done(tn: Telnet, max_retries: int = 60) -> bool: - for i in range(1, max_retries + 1): - # updategraph is started after the config-setup - result = tn.write_and_wait('systemctl is-active updategraph') - if not 'inactive' in result: - return True - time.sleep(1) - if i == max_retries: - return False - - -def authorize_ssh_key(tn: Telnet) -> None: - with open('/id_rsa.pub') as f: - key = f.read().strip() - - tn.write_and_wait(f'echo "{key}" > authorized_keys') - tn.write_and_wait('sudo mkdir /root/.ssh') - tn.write_and_wait('sudo chmod 0600 /root/.ssh') - tn.write_and_wait('sudo cp authorized_keys /root/.ssh/') - - -if __name__ == '__main__': - main()