from __future__ import unicode_literals import nnpy import struct from p4utils.utils.topology import Topology from p4utils.utils.sswitch_API import SimpleSwitchAPI from scapy.all import * from scapy.data import ETHER_TYPES import sys import re import logging import argparse import subprocess import ipaddress logging.basicConfig() log = logging.getLogger("main") cpu_fields = { 0: 'UNSET', 1: 'ICMP6_NS', 2: 'ICMP6_GENERAL', 3: 'DEBUG', 8: 'NAT64_TCP_SESSION' } table_id_fields = { 0: 'UNSET_TABLE', 1: 'TABLE_NAT64', 2: 'TABLE_ICMP6', 3: 'TABLE_V6_NETWORKS', 4: 'TABLE_NAT46', 5: 'TABLE_V4_NETWORKS', 6: 'TABLE_ARP', 7: 'TABLE_ARP_EGRESS', 8: 'TABLE_ICMP', 9: 'TABLE_NAT64_TCP', 10: 'TABLE_NAT64_UDP', 11: 'TABLE_NAT64_ICMP6', 12: 'TABLE_NAT64_SESSION' } table_proto = { 'ICMP6_ECHO_REQUEST' : 128, 'ICMP6_ECHO_REPLY' : 129, 'ICMP6_NS' : 135, 'ICMP6_NA' : 136, 'ICMP_ECHO_REPLY' : 0, 'ICMP_ECHO_REQUEST' : 8 } table_arp = { 'ARP_REQUEST': 1, 'ARP_REPLY': 2 } class CpuHeader(Packet): name = 'CpuPacket' fields_desc = [ ShortEnumField('task', 1, cpu_fields ), ShortField('ingress_port', 0), XShortEnumField("type", 0x9000, ETHER_TYPES), ShortEnumField('table_id', 1, table_id_fields ) ] class L2Controller(object): def __init__(self, sw_name): # Command line mapping self.modes = ['base', 'router', "range_router", "session_router" ] # Reverse maps the cpu header self.task = dict(reversed(item) for item in cpu_fields.items()) self.info={} # https://en.wikipedia.org/wiki/Solicited-node_multicast_address self.info['ndp_multicast'] = ipaddress.ip_network("ff02::1:ff00:0/104") self.info['mac_base'] = "00:00:0a:00:00:0{}" self.info['mac_addr'] = "00:00:0a:00:00:42" self.info['mac_broadcast'] = "ff:ff:ff:ff:ff:ff" self.info['ipv6_link_local'] = ipaddress.ip_address("fe80::200:aff:fe00:42") self.info['v6_mask'] = 64 self.info['v6_nat64_mask'] = 96 self.info['v6_base'] = ipaddress.ip_network("2001:db8::/40") self.info['v6_base_hostnet'] = ipaddress.ip_network("2001:db8::/48") self.info['v6_gen'] = self.info['v6_base_hostnet'].subnets(new_prefix=self.info['v6_mask']) # possible new range for NAT64 prefixes self.info['v6_nat64_base'] = ipaddress.ip_network("2001:db8:1::/48") # We reserve /64 (easier for reading), but only use /96 self.info['v6_nat64_gen'] = self.info['v6_nat64_base'].subnets(new_prefix=self.info['v6_mask']) self.info['v4_mask'] = 24 self.info['v4_base'] = ipaddress.ip_network("10.0.0.0/8") self.info['v4_gen'] = self.info['v4_base'].subnets(new_prefix=self.info['v4_mask']) self.info['v4_nat64_base'] = ipaddress.ip_network("10.1.0.0/16") self.info['v4_nat64_map'] = self.info['v4_nat64_base'].subnets(new_prefix=self.info['v4_mask']) self.info['switch_suffix'] = 0x42 self.info['nat64_prefix'] = ipaddress.ip_network("64:ff9b::/96") # /96 after the /40 pool we use above self.info['nat64_prefix_dynamic'] = ipaddress.ip_network("2001:db8:100::/96") self.info['nat64_tcp_session'] = {} self.v6_routes = {} self.v6_routes[None] = [] self.v6_routes['base'] = [] self.ports = [] for port in range(1,3): net = self.info['v6_gen'].next() self.v6_routes['base'].append({ "net": net, "port": port, "mac": self.info['mac_base'].format(port) }) self.ports.append(port) self.v6_routes['router'] = self.v6_routes['base'] # only 1 route to avoid table duplicate/conflict self.v6_routes['range_router'] = self.v6_routes['base'][0:1] self.v6_routes['session_router'] = self.v6_routes['range_router'] self.v4_routes = {} self.v4_routes[None] = [] self.v4_routes['base'] = [] for port in range(3,5): net = self.info['v4_gen'].next() self.v4_routes['base'].append({ "net": net, "port": port, "mac": self.info['mac_base'].format(port) }) self.ports.append(port) self.v4_routes['router'] = self.v4_routes['base'] self.v4_routes['range_router'] = self.v4_routes['base'] self.v4_routes['session_router'] = self.v4_routes['base'] self.v6_addresses = {} self.v6_addresses[None] = [] for mode in self.modes: self.v6_addresses[mode] = [ net['net'][self.info['switch_suffix']] for net in self.v6_routes[mode] ] self.v4_addresses = {} self.v4_addresses[None] = [] for mode in self.modes: self.v4_addresses[mode] = [ net['net'][self.info['switch_suffix']] for net in self.v4_routes[mode] ] self.nat64_map = {} self.nat64_session_net = {} # init default for mode in self.modes: self.nat64_map[mode] = [] self.nat64_session_net[mode] = [] # specific settings - mapping 256 IPv6 IPs max statically (based on /24) for mode in ["range_router"]: for v6_net in self.v6_routes[mode]: # This is a /64 v6_dst_base = self.info['v6_nat64_gen'].next() # This is a /96 -> the first /96 inside the /64 v6_dst = v6_dst_base.subnets(new_prefix=self.info['v6_nat64_mask']).next() for v4_net in self.v4_routes[mode]: v4_dst = self.info['v4_nat64_map'].next() self.nat64_map[mode].append({ "v6_src": v6_net['net'], # "v6_dst": self.info['nat64_prefix'] # static -- not supported ATM "v6_dst": v6_dst, "v4_src": v4_net['net'], "v4_dst": v4_dst }) # allow session translation # TODO: maybe support multiple networks for mode in ["session_router"]: self.nat64_session_net[mode] = [ { 'v4_net': self.info['v4_nat64_map'].next(), 'v6_net': self.info['nat64_prefix_dynamic'], 'v4_idx': 1 } ] self.init_boilerplate(sw_name) self.init_other_port_multicast_groups() def gen_ndp_multicast_addr(self, addr): """ append the 24 bit of the address to the multicast address""" last_24 = int(addr) & 0xffffff addr = self.info['ndp_multicast'][last_24] return addr def init_other_port_multicast_groups(self): """ map multicast group x to send to all ports but x - basically broadcasting without sending back to ourselves """ # create multicast nodes for rid in self.ports: ports = [ x for x in self.ports if not x == rid ] n_handle = self.controller.mc_node_create(rid, ports) log.debug("Creating MC node rid={} ports={} handle={}".format(rid, ports, n_handle)) g_handle = self.controller.mc_mgrp_create(rid) log.debug("Created MC group mgrp={} handle={} && associating afterwards".format(rid, g_handle)) self.controller.mc_node_associate(g_handle, n_handle) def init_boilerplate(self, sw_name): self.topo = Topology(db="topology.db") self.sw_name = sw_name self.thrift_port = self.topo.get_thrift_port(sw_name) self.cpu_port = self.topo.get_cpu_port_index(self.sw_name) self.controller = SimpleSwitchAPI(self.thrift_port) self.intf = str(self.topo.get_cpu_port_intf(self.sw_name).replace("eth0", "eth1")) self.controller.reset_state() if self.cpu_port: self.controller.mirroring_add(100, self.cpu_port) def config(self): self.fill_tables() self.config_hosts() def listen_to_icmp6_multicast(self): """Only needed for debugging""" net = self.info['ndp_multicast'] self.controller.table_add("v6_networks", "controller_debug", [str(net)]) def init_ndp_in_switch(self, addr): icmp6_addr = self.gen_ndp_multicast_addr(addr) icmp6_net = "{}/128".format(icmp6_addr) icmp6_type = table_proto['ICMP6_NS'] mac_addr = self.info['mac_addr'] self.controller.table_add("icmp6", "icmp6_neighbor_solicitation", [str(icmp6_net), str(icmp6_type)], [str(addr), str(mac_addr)]) def init_icmp6_echo_in_switch(self, addr): icmp6_addr = addr icmp6_type = table_proto['ICMP6_ECHO_REQUEST'] icmp6_net = "{}/128".format(icmp6_addr) self.controller.table_add("icmp6", "icmp6_echo_reply", [str(icmp6_net), str(icmp6_type)], []) def init_icmp_echo_in_switch(self, addr): icmp_addr = addr icmp_type = table_proto['ICMP_ECHO_REQUEST'] icmp_net = "{}/32".format(icmp_addr) self.controller.table_add("icmp", "icmp_echo_reply", [str(icmp_net), str(icmp_type)], []) def ipv4_router(self, net): return net[self.info['switch_suffix']] def fill_tables(self): self.controller.table_clear("v6_networks") for v6route in self.v6_routes[self.mode]: #self.controller.table_add("v6_networks", "set_egress_port", [str(v6route['net'])], [str(v6route['port'])]) self.controller.table_add("v6_networks", "set_egress_port_and_mac", [str(v6route['net'])], [str(v6route['port']), str(v6route['mac']), ]) self.controller.table_clear("v4_networks") self.controller.table_clear("v4_arp") self.controller.table_clear("v4_arp_egress") for v4route in self.v4_routes[self.mode]: # self.controller.table_add("v4_networks", "set_egress_port", [str(v4route['net'])], [str(v4route['port'])]) self.controller.table_add("v4_networks", "set_egress_port_and_mac", [str(v4route['net'])], [str(v4route['port']), str(v4route['mac']), ]) # ARP support self.controller.table_add("v4_arp_egress", "set_egress_port", [str(v4route['net'])], [str(v4route['port'])]) router = "{}/32".format(self.ipv4_router(v4route['net'])) self.controller.table_add("v4_arp", "arp_reply", [str(self.info['mac_broadcast']), str(table_arp['ARP_REQUEST']), router], [str(self.info['mac_addr'])] ) if self.args.multicast_to_controller: self.listen_to_icmp6_multicast() # icmp6 echo request + NDP self.controller.table_clear("icmp6") for v6addr in self.v6_addresses[self.mode]: self.init_ndp_in_switch(v6addr) self.init_icmp6_echo_in_switch(v6addr) # icmp echo request self.controller.table_clear("icmp") for addr in self.v4_addresses[self.mode]: self.init_icmp_echo_in_switch(addr) self.controller.table_clear("nat64") self.controller.table_clear("nat46") for nat64map in self.nat64_map[self.mode]: self.static_nat64_mapping(**nat64map) # NAT64 session based self.controller.table_clear("nat64_session") # These will be only populated dynamically self.controller.table_clear("nat64_tcp_session") self.controller.table_clear("nat46_tcp_session") for net in self.nat64_session_net[self.mode]: # Only for matching / if selecting self.controller.table_add("nat64_session", "NoAction", [ str(net['v6_net']) ] ) def static_nat64_mapping(self, v6_src, v6_dst, v4_src, v4_dst): """ Currently using destination only matching due to non priority LPM support in P4 This could be solved with ternary matches or smart double table usage """ log.info("NAT64 map: ({} -> {} => {}), ({} -> {} -> {} (only /24)))".format( v6_src, v6_dst, v4_dst, v4_src, v4_dst, v6_src)) self.controller.table_add("nat64", "nat64_static", [str(v6_dst)], [str(v6_src.network_address), str(v4_dst.network_address), str(v6_dst.network_address)] ) self.controller.table_add("nat46", "nat46_static", [str(v4_dst)], [str(v6_src.network_address), str(v4_dst.network_address), str(v6_dst.network_address)] ) def config_hosts(self): """ Assumptions: - all routes are networks (no /128 v6 or /32 v4 - hosts get the first ip address in the network """ for v6route in self.v6_routes[self.mode]: host = "h{}".format(v6route['port']) dev = "{}-eth0".format(host) net = v6route['net'] ipaddr = "{}/{}".format(net[1],net.prefixlen) router = str(net[self.info['switch_suffix']]) self.config_v6_host(host, str(net), str(ipaddr), dev, router) for v4route in self.v4_routes[self.mode]: host = "h{}".format(v4route['port']) dev = "{}-eth0".format(host) net = v4route['net'] ipaddr = "{}/{}".format(net[1],net.prefixlen) router = str(self.ipv4_router(net)) self.config_v4_host(host, str(net), str(ipaddr), dev, router) @staticmethod def config_v6_host(host, net, ipaddr, dev, router=None): log.debug("Config v6 host: {} {}->{} on {}".format(host, net, ipaddr, dev)) subprocess.call(["mx", host, "ip", "addr", "flush", "dev", dev]) for v6dev in [ "lo", "default", "all", dev ]: subprocess.call(["mx", host, "sysctl", "net.ipv6.conf.{}.disable_ipv6=0".format(v6dev)]) # Set down & up to regain link local address subprocess.call(["mx", host, "ip", "link", "set", dev, "down"]) subprocess.call(["mx", host, "ip", "link", "set", dev, "up"]) subprocess.call(["mx", host, "ip", "addr", "add", ipaddr, "dev", dev]) if router: subprocess.call(["mx", host, "ip", "route", "add", "default", "via", router]) @staticmethod def config_v4_host(host, net, ipaddr, dev, router=None): log.debug("Config v4 host: {} {}->{} on {} via {}".format(host, net, ipaddr, dev, router)) subprocess.call(["mx", host, "ip", "addr", "flush", "dev", dev]) subprocess.call(["mx", host, "ip", "addr", "add", ipaddr, "dev", dev]) if router: subprocess.call(["mx", host, "ip", "route", "add", "default", "via", router]) def debug_print_pkg(self, pkg, msg="INCOMING"): log.debug("{}: {}".format(msg, pkg.__repr__())) def debug_format_pkg(self, pkg): packet = Ether(str(pkg)) if packet.type == 0x800: ip = pkg.getlayer(IP) elif packet.type == 0x86dd: ip = pkg.getlayer(IPv6) # tcp = pkg.getlayer(TCP) # raw = pkg.getlayer(Raw) # return "{}:{} => {}:{}: flags={} seq={} ack={} raw={}".format( # ip.src, tcp.sport, # ip.dst, tcp.dport, # tcp.flags, # tcp.seq, # tcp.ack, # raw) def handle_icmp6_echo_request(self, pkg): """ Sample from the wire: DEBUG:main:reassambled=>> """ log.info("Replying to ICMP packet") dst_mac = pkg[Ether].src src_mac = pkg[Ether].dst dst_addr = pkg[IPv6].src src_addr = pkg[IPv6].dst e = Ether(src=src_mac, dst=dst_mac) i = IPv6(src=src_addr, dst=dst_addr) i2 = ICMPv6EchoReply(id=pkg[ICMPv6EchoRequest].id, seq=pkg[ICMPv6EchoRequest].seq, data=pkg[ICMPv6EchoRequest].data) i2.cksum = None answer = e / i / i2 self.send_pkg(answer) def handle_icmp6_ns(self, pkg): """ Solicitated NA""" # Both ways should work dst_mac = pkg[Ether].src dst_mac = pkg[ICMPv6NDOptSrcLLAddr].lladdr src_mac = self.info['mac_address'] dst_addr = pkg[IPv6].src src_addr = pkg[ICMPv6ND_NS].tgt e = Ether(src=src_mac, dst=dst_mac) i = IPv6(src=src_addr, dst=dst_addr) # S=1 -> solicitated i2 = ICMPv6ND_NA(S=1, R=0, tgt=src_addr) # try5: cksum not chksum ! i2.cksum = None i3 = ICMPv6NDOptDstLLAddr(lladdr=src_mac) answer = e / i / i2 / i3 # try 4 # for l in [Ether, IPv6, ICMPv6ND_NA, ICMPv6NDOptDstLLAddr]: # try: # del answer[l].chksum # except AttributeError: # pass # Let scapy recalc checksum (try3) # answer = answer.__class__(str(answer)) self.send_pkg(answer) def nat64_tcp_session_entry(self, ipv6_src_addr, ipv6_dst_addr, tcp_src_port, tcp_dst_port): return "{}:{} - {}:{}".format(ipv6_src_addr, tcp_src_port, ipv6_dst_addr, tcp_dst_port) def nat64_create_tcp_session(self, pkg): """ session: hdr.ipv6.src_addr: exact; hdr.ipv6.dst_addr: exact; hdr.tcp.src_port: exact; hdr.tcp.dst_port: exact; """ id = self.nat64_tcp_session_entry(pkg[IPv6].src, pkg[TCP].sport, pkg[IPv6].dst, pkg[TCP].dport) # Has already been added? then it's a race condition and # it needs to go back to the switch # Not in the table? create an entry! if not id in self.info['nat64_tcp_session']: # FIXME: Change randomly later, supporting 1:N mappings # Keep the same for the moment tcp_src_port = pkg[TCP].sport tcp_dst_port = pkg[TCP].dport # FIXME: range, reuse, etc. idx = self.nat64_session_net[self.mode][0]['v4_idx'] self.nat64_session_net[self.mode][0]['v4_idx'] += 1 ipv4_src_addr = self.nat64_session_net[self.mode][0]['v4_net'][idx] ipv6_dst_addr = ipaddress.IPv6Address(pkg[IPv6].dst) ipv4_dst_addr = int(ipv6_dst_addr) - int(self.nat64_session_net[self.mode][0]['v6_net'][0]) log.debug("src v4={}, dst v6 = {}, dst v4 = {}".format(ipv4_src_addr, ipv6_dst_addr, ipv4_dst_addr)) self.controller.table_add("nat64_tcp_session", "nat64_tcp_session_translate", [ pkg[IPv6].src, pkg[TCP].sport, pkg[IPv6].dst, pkg[TCP].dport ], [ ipv4_src_addr, tcp_src_port, ipv4_dst_addr, tcp_dst_port ] ) self.send_pkg(pkg) def send_pkg(self, pkg): self.debug_print_pkg(pkg, "OUTGOING") sendp(pkg, iface=self.intf, verbose=False) def recv_msg_cpu(self, pkg): packet = Ether(str(pkg)) cpu_header = "" ether_orig = "" orig_packet = "" if packet.type == 0x4242: cpu_header = CpuHeader(packet.payload) # Not necessary anymore - cpu decoding works log.debug("cpu = {}".format(cpu_header.__repr__())) ether_orig = Ether(src=packet.src, dst=packet.dst, type=cpu_header.type) # Note to myself: this is actually broken for ARP if cpu_header.type == 0x0800: orig_packet = ether_orig / IP(cpu_header.load) elif cpu_header.type == 0x86dd: orig_packet = ether_orig / IPv6(cpu_header.load) else: print("Broken pkg: {}".format(pkg.__repr__())) return else: print("Broken / unhandled pkg: {}".format(pkg.__repr__())) return # Process parsed if ICMPv6ND_NS in orig_packet and orig_packet['IPv6'].src == '::': log.info("Neighbor solicitation for checking her own IP address") elif ICMPv6MLReport2 in orig_packet and orig_packet['IPv6'].dst == 'ff02::16': mc_group = orig_packet['ICMPv6MLDMultAddrRec'].dst log.info("Multicast registration for {} port {} -- should probably handle this".format(mc_group, cpu_header.ingress_port)) elif ICMPv6ND_RS in orig_packet and orig_packet['IPv6'].dst == 'ff02::2': src = orig_packet['IPv6'].src log.info("Router solicitation from {} -- should probably handle this?".format(src)) elif cpu_header.task == self.task['ICMP6_NS']: log.info("Doing neighbor solicitation for the switch in the controller") self.handle_icmp6_ns(orig_packet) elif cpu_header.task == self.task['ICMP6_GENERAL']: if ICMPv6EchoRequest in orig_packet: self.handle_icmp6_echo_request(orig_packet) elif cpu_header.task == self.task['NAT64_TCP_SESSION']: self.nat64_create_tcp_session(orig_packet) else: log.info("unhandled reassambled={} from table {}".format(orig_packet.__repr__(), table_id_fields[cpu_header.table_id])) def run_cpu_port_loop(self): sniff(iface=self.intf, prn=self.recv_msg_cpu) def commandline(self): parser = argparse.ArgumentParser(description='controller++') parser.add_argument('--mode', help='Select mode / settings to use', choices=self.modes) parser.add_argument('--debug', help='Enable debug logging', action='store_true') parser.add_argument('--verbose', help='Enable verbose logging', action='store_true') parser.add_argument('--multicast-to-controller', help='Send debug multicast to controller', action='store_true') self.args = parser.parse_args() self.mode = self.args.mode self.debug = self.args.debug if __name__ == "__main__": import sys import os sw_name = "s1" controller = L2Controller(sw_name) controller.commandline() if controller.args.debug: log.setLevel(logging.DEBUG) elif controller.args.verbose: log.setLevel(logging.INFO) else: log.setLevel(logging.WARNING) log.info("Booting...") log.debug("Debug enabled.") controller.config() controller.run_cpu_port_loop()