diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..3d99a2475 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "files.associations": { + "atomic": "cpp" + } +} \ No newline at end of file diff --git a/kernel/include/uapi/linux/netfilter/xt_connmark.h b/kernel/include/uapi/linux/netfilter/xt_connmark.h index 408a9654f..36cc956ea 100644 --- a/kernel/include/uapi/linux/netfilter/xt_connmark.h +++ b/kernel/include/uapi/linux/netfilter/xt_connmark.h @@ -1,32 +1,7 @@ -/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ -#ifndef _XT_CONNMARK_H -#define _XT_CONNMARK_H +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _XT_CONNMARK_H_target +#define _XT_CONNMARK_H_target -#include +#include -/* Copyright (C) 2002,2004 MARA Systems AB - * by Henrik Nordstrom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -enum { - XT_CONNMARK_SET = 0, - XT_CONNMARK_SAVE, - XT_CONNMARK_RESTORE -}; - -struct xt_connmark_tginfo1 { - __u32 ctmark, ctmask, nfmask; - __u8 mode; -}; - -struct xt_connmark_mtinfo1 { - __u32 mark, mask; - __u8 invert; -}; - -#endif /*_XT_CONNMARK_H*/ +#endif /*_XT_CONNMARK_H_target*/ diff --git a/kernel/include/uapi/linux/netfilter/xt_dscp.h b/kernel/include/uapi/linux/netfilter/xt_dscp.h index 7594e4df8..223d635e8 100644 --- a/kernel/include/uapi/linux/netfilter/xt_dscp.h +++ b/kernel/include/uapi/linux/netfilter/xt_dscp.h @@ -1,32 +1,27 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* x_tables module for matching the IPv4/IPv6 DSCP field +/* x_tables module for setting the IPv4/IPv6 DSCP field * * (C) 2002 Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh * This software is distributed under GNU GPL v2, 1991 * * See RFC2474 for a description of the DSCP field within the IP Header. * - * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp + * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp */ -#ifndef _XT_DSCP_H -#define _XT_DSCP_H - +#ifndef _XT_DSCP_TARGET_H +#define _XT_DSCP_TARGET_H +#include #include -#define XT_DSCP_MASK 0xfc /* 11111100 */ -#define XT_DSCP_SHIFT 2 -#define XT_DSCP_MAX 0x3f /* 00111111 */ - -/* match info */ -struct xt_dscp_info { +/* target info */ +struct xt_DSCP_info { __u8 dscp; - __u8 invert; }; -struct xt_tos_match_info { - __u8 tos_mask; +struct xt_tos_target_info { __u8 tos_value; - __u8 invert; + __u8 tos_mask; }; -#endif /* _XT_DSCP_H */ +#endif /* _XT_DSCP_TARGET_H */ diff --git a/kernel/include/uapi/linux/netfilter/xt_mark.h b/kernel/include/uapi/linux/netfilter/xt_mark.h index 9d0526ced..f1fe2b4be 100644 --- a/kernel/include/uapi/linux/netfilter/xt_mark.h +++ b/kernel/include/uapi/linux/netfilter/xt_mark.h @@ -1,16 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _XT_MARK_H -#define _XT_MARK_H +#ifndef _XT_MARK_H_target +#define _XT_MARK_H_target -#include +#include -struct xt_mark_tginfo2 { - __u32 mark, mask; -}; - -struct xt_mark_mtinfo1 { - __u32 mark, mask; - __u8 invert; -}; - -#endif /*_XT_MARK_H*/ +#endif /*_XT_MARK_H_target */ diff --git a/kernel/include/uapi/linux/netfilter/xt_rateest.h b/kernel/include/uapi/linux/netfilter/xt_rateest.h index 52a37bdc1..2b87a71e6 100644 --- a/kernel/include/uapi/linux/netfilter/xt_rateest.h +++ b/kernel/include/uapi/linux/netfilter/xt_rateest.h @@ -1,39 +1,17 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _XT_RATEEST_MATCH_H -#define _XT_RATEEST_MATCH_H +#ifndef _XT_RATEEST_TARGET_H +#define _XT_RATEEST_TARGET_H #include #include -enum xt_rateest_match_flags { - XT_RATEEST_MATCH_INVERT = 1<<0, - XT_RATEEST_MATCH_ABS = 1<<1, - XT_RATEEST_MATCH_REL = 1<<2, - XT_RATEEST_MATCH_DELTA = 1<<3, - XT_RATEEST_MATCH_BPS = 1<<4, - XT_RATEEST_MATCH_PPS = 1<<5, -}; - -enum xt_rateest_match_mode { - XT_RATEEST_MATCH_NONE, - XT_RATEEST_MATCH_EQ, - XT_RATEEST_MATCH_LT, - XT_RATEEST_MATCH_GT, -}; - -struct xt_rateest_match_info { - char name1[IFNAMSIZ]; - char name2[IFNAMSIZ]; - __u16 flags; - __u16 mode; - __u32 bps1; - __u32 pps1; - __u32 bps2; - __u32 pps2; +struct xt_rateest_target_info { + char name[IFNAMSIZ]; + __s8 interval; + __u8 ewma_log; /* Used internally by the kernel */ - struct xt_rateest *est1 __attribute__((aligned(8))); - struct xt_rateest *est2 __attribute__((aligned(8))); + struct xt_rateest *est __attribute__((aligned(8))); }; -#endif /* _XT_RATEEST_MATCH_H */ +#endif /* _XT_RATEEST_TARGET_H */ diff --git a/kernel/include/uapi/linux/netfilter/xt_tcpmss.h b/kernel/include/uapi/linux/netfilter/xt_tcpmss.h index 2268f58b4..65ea6c9da 100644 --- a/kernel/include/uapi/linux/netfilter/xt_tcpmss.h +++ b/kernel/include/uapi/linux/netfilter/xt_tcpmss.h @@ -1,12 +1,13 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _XT_TCPMSS_MATCH_H -#define _XT_TCPMSS_MATCH_H +#ifndef _XT_TCPMSS_H +#define _XT_TCPMSS_H #include -struct xt_tcpmss_match_info { - __u16 mss_min, mss_max; - __u8 invert; +struct xt_tcpmss_info { + __u16 mss; }; -#endif /*_XT_TCPMSS_MATCH_H*/ +#define XT_TCPMSS_CLAMP_PMTU 0xffff + +#endif /* _XT_TCPMSS_H */ diff --git a/kernel/include/uapi/linux/netfilter_ipv4/ipt_ecn.h b/kernel/include/uapi/linux/netfilter_ipv4/ipt_ecn.h index 8121bec47..e3630fd04 100644 --- a/kernel/include/uapi/linux/netfilter_ipv4/ipt_ecn.h +++ b/kernel/include/uapi/linux/netfilter_ipv4/ipt_ecn.h @@ -1,16 +1,34 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _IPT_ECN_H -#define _IPT_ECN_H +/* Header file for iptables ipt_ECN target + * + * (C) 2002 by Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * + * ipt_ECN.h,v 1.3 2002/05/29 12:17:40 laforge Exp +*/ +#ifndef _IPT_ECN_TARGET_H +#define _IPT_ECN_TARGET_H -#include -#define ipt_ecn_info xt_ecn_info +#include +#include -enum { - IPT_ECN_IP_MASK = XT_ECN_IP_MASK, - IPT_ECN_OP_MATCH_IP = XT_ECN_OP_MATCH_IP, - IPT_ECN_OP_MATCH_ECE = XT_ECN_OP_MATCH_ECE, - IPT_ECN_OP_MATCH_CWR = XT_ECN_OP_MATCH_CWR, - IPT_ECN_OP_MATCH_MASK = XT_ECN_OP_MATCH_MASK, +#define IPT_ECN_IP_MASK (~XT_DSCP_MASK) + +#define IPT_ECN_OP_SET_IP 0x01 /* set ECN bits of IPv4 header */ +#define IPT_ECN_OP_SET_ECE 0x10 /* set ECE bit of TCP header */ +#define IPT_ECN_OP_SET_CWR 0x20 /* set CWR bit of TCP header */ + +#define IPT_ECN_OP_MASK 0xce + +struct ipt_ECN_info { + __u8 operation; /* bitset of operations */ + __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */ + union { + struct { + __u8 ece:1, cwr:1; /* TCP ECT bits */ + } tcp; + } proto; }; -#endif /* IPT_ECN_H */ +#endif /* _IPT_ECN_TARGET_H */ diff --git a/kernel/include/uapi/linux/netfilter_ipv4/ipt_ttl.h b/kernel/include/uapi/linux/netfilter_ipv4/ipt_ttl.h index ad0226a86..57d2fc67a 100644 --- a/kernel/include/uapi/linux/netfilter_ipv4/ipt_ttl.h +++ b/kernel/include/uapi/linux/netfilter_ipv4/ipt_ttl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* IP tables module for matching the value of the TTL - * (C) 2000 by Harald Welte */ +/* TTL modification module for IP tables + * (C) 2000 by Harald Welte */ #ifndef _IPT_TTL_H #define _IPT_TTL_H @@ -8,14 +8,14 @@ #include enum { - IPT_TTL_EQ = 0, /* equals */ - IPT_TTL_NE, /* not equals */ - IPT_TTL_LT, /* less than */ - IPT_TTL_GT, /* greater than */ + IPT_TTL_SET = 0, + IPT_TTL_INC, + IPT_TTL_DEC }; +#define IPT_TTL_MAXMODE IPT_TTL_DEC -struct ipt_ttl_info { +struct ipt_TTL_info { __u8 mode; __u8 ttl; }; diff --git a/kernel/include/uapi/linux/netfilter_ipv6/ip6t_hl.h b/kernel/include/uapi/linux/netfilter_ipv6/ip6t_hl.h index 6b62f9418..eaed56a28 100644 --- a/kernel/include/uapi/linux/netfilter_ipv6/ip6t_hl.h +++ b/kernel/include/uapi/linux/netfilter_ipv6/ip6t_hl.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* ip6tables module for matching the Hop Limit value +/* Hop Limit modification module for ip6tables * Maciej Soltysiak - * Based on HW's ttl module */ + * Based on HW's TTL module */ #ifndef _IP6T_HL_H #define _IP6T_HL_H @@ -9,14 +9,14 @@ #include enum { - IP6T_HL_EQ = 0, /* equals */ - IP6T_HL_NE, /* not equals */ - IP6T_HL_LT, /* less than */ - IP6T_HL_GT, /* greater than */ + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC }; +#define IP6T_HL_MAXMODE IP6T_HL_DEC -struct ip6t_hl_info { +struct ip6t_HL_info { __u8 mode; __u8 hop_limit; }; diff --git a/kernel/net/netfilter/xt_dscp.c b/kernel/net/netfilter/xt_dscp.c index 236ac8008..3f83d38c4 100644 --- a/kernel/net/netfilter/xt_dscp.c +++ b/kernel/net/netfilter/xt_dscp.c @@ -1,11 +1,14 @@ -/* IP tables module for matching the value of the IPv4/IPv6 DSCP field +/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8 * * (C) 2002 by Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - */ + * + * See RFC2474 for a description of the DSCP field within the IP Header. +*/ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include @@ -14,102 +17,150 @@ #include #include -#include +#include MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: DSCP/TOS field match"); +MODULE_DESCRIPTION("Xtables: DSCP/TOS field modification"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_dscp"); -MODULE_ALIAS("ip6t_dscp"); -MODULE_ALIAS("ipt_tos"); -MODULE_ALIAS("ip6t_tos"); +MODULE_ALIAS("ipt_DSCP"); +MODULE_ALIAS("ip6t_DSCP"); +MODULE_ALIAS("ipt_TOS"); +MODULE_ALIAS("ip6t_TOS"); -static bool -dscp_mt(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +dscp_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_dscp_info *info = par->matchinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; - return (dscp == info->dscp) ^ !!info->invert; + if (dscp != dinfo->dscp) { + if (!skb_make_writable(skb, sizeof(struct iphdr))) + return NF_DROP; + + ipv4_change_dsfield(ip_hdr(skb), + (__force __u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + + } + return XT_CONTINUE; } -static bool -dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_dscp_info *info = par->matchinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; - return (dscp == info->dscp) ^ !!info->invert; + if (dscp != dinfo->dscp) { + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + return NF_DROP; + + ipv6_change_dsfield(ipv6_hdr(skb), + (__force __u8)(~XT_DSCP_MASK), + dinfo->dscp << XT_DSCP_SHIFT); + } + return XT_CONTINUE; } -static int dscp_mt_check(const struct xt_mtchk_param *par) +static int dscp_tg_check(const struct xt_tgchk_param *par) { - const struct xt_dscp_info *info = par->matchinfo; + const struct xt_DSCP_info *info = par->targinfo; if (info->dscp > XT_DSCP_MAX) { pr_info("dscp %x out of range\n", info->dscp); return -EDOM; } - return 0; } -static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +tos_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_tos_target_info *info = par->targinfo; + struct iphdr *iph = ip_hdr(skb); + u_int8_t orig, nv; + + orig = ipv4_get_dsfield(iph); + nv = (orig & ~info->tos_mask) ^ info->tos_value; + + if (orig != nv) { + if (!skb_make_writable(skb, sizeof(struct iphdr))) + return NF_DROP; + iph = ip_hdr(skb); + ipv4_change_dsfield(iph, 0, nv); + } + + return XT_CONTINUE; +} + +static unsigned int +tos_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct xt_tos_match_info *info = par->matchinfo; - - if (xt_family(par) == NFPROTO_IPV4) - return ((ip_hdr(skb)->tos & info->tos_mask) == - info->tos_value) ^ !!info->invert; - else - return ((ipv6_get_dsfield(ipv6_hdr(skb)) & info->tos_mask) == - info->tos_value) ^ !!info->invert; + const struct xt_tos_target_info *info = par->targinfo; + struct ipv6hdr *iph = ipv6_hdr(skb); + u_int8_t orig, nv; + + orig = ipv6_get_dsfield(iph); + nv = (orig & ~info->tos_mask) ^ info->tos_value; + + if (orig != nv) { + if (!skb_make_writable(skb, sizeof(struct iphdr))) + return NF_DROP; + iph = ipv6_hdr(skb); + ipv6_change_dsfield(iph, 0, nv); + } + + return XT_CONTINUE; } -static struct xt_match dscp_mt_reg[] __read_mostly = { +static struct xt_target dscp_tg_reg[] __read_mostly = { { - .name = "dscp", + .name = "DSCP", .family = NFPROTO_IPV4, - .checkentry = dscp_mt_check, - .match = dscp_mt, - .matchsize = sizeof(struct xt_dscp_info), + .checkentry = dscp_tg_check, + .target = dscp_tg, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", .me = THIS_MODULE, }, { - .name = "dscp", + .name = "DSCP", .family = NFPROTO_IPV6, - .checkentry = dscp_mt_check, - .match = dscp_mt6, - .matchsize = sizeof(struct xt_dscp_info), + .checkentry = dscp_tg_check, + .target = dscp_tg6, + .targetsize = sizeof(struct xt_DSCP_info), + .table = "mangle", .me = THIS_MODULE, }, { - .name = "tos", + .name = "TOS", .revision = 1, .family = NFPROTO_IPV4, - .match = tos_mt, - .matchsize = sizeof(struct xt_tos_match_info), + .table = "mangle", + .target = tos_tg, + .targetsize = sizeof(struct xt_tos_target_info), .me = THIS_MODULE, }, { - .name = "tos", + .name = "TOS", .revision = 1, .family = NFPROTO_IPV6, - .match = tos_mt, - .matchsize = sizeof(struct xt_tos_match_info), + .table = "mangle", + .target = tos_tg6, + .targetsize = sizeof(struct xt_tos_target_info), .me = THIS_MODULE, }, }; -static int __init dscp_mt_init(void) +static int __init dscp_tg_init(void) { - return xt_register_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg)); + return xt_register_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg)); } -static void __exit dscp_mt_exit(void) +static void __exit dscp_tg_exit(void) { - xt_unregister_matches(dscp_mt_reg, ARRAY_SIZE(dscp_mt_reg)); + xt_unregister_targets(dscp_tg_reg, ARRAY_SIZE(dscp_tg_reg)); } -module_init(dscp_mt_init); -module_exit(dscp_mt_exit); +module_init(dscp_tg_init); +module_exit(dscp_tg_exit); diff --git a/kernel/net/netfilter/xt_hl.c b/kernel/net/netfilter/xt_hl.c index 003951149..1535e87ed 100644 --- a/kernel/net/netfilter/xt_hl.c +++ b/kernel/net/netfilter/xt_hl.c @@ -1,96 +1,169 @@ /* - * IP tables module for matching the value of the TTL - * (C) 2000,2001 by Harald Welte + * TTL modification target for IP tables + * (C) 2000,2005 by Harald Welte * - * Hop Limit matching module - * (C) 2001-2002 Maciej Soltysiak + * Hop Limit modification target for ip6tables + * Maciej Soltysiak * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - -#include -#include +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include +#include +#include #include -#include -#include +#include +#include +MODULE_AUTHOR("Harald Welte "); MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match"); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target"); MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_ttl"); -MODULE_ALIAS("ip6t_hl"); -static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +ttl_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct ipt_ttl_info *info = par->matchinfo; - const u8 ttl = ip_hdr(skb)->ttl; + struct iphdr *iph; + const struct ipt_TTL_info *info = par->targinfo; + int new_ttl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + iph = ip_hdr(skb); switch (info->mode) { - case IPT_TTL_EQ: - return ttl == info->ttl; - case IPT_TTL_NE: - return ttl != info->ttl; - case IPT_TTL_LT: - return ttl < info->ttl; - case IPT_TTL_GT: - return ttl > info->ttl; + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + csum_replace2(&iph->check, htons(iph->ttl << 8), + htons(new_ttl << 8)); + iph->ttl = new_ttl; } - return false; + return XT_CONTINUE; } -static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par) +static unsigned int +hl_tg6(struct sk_buff *skb, const struct xt_action_param *par) { - const struct ip6t_hl_info *info = par->matchinfo; - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = par->targinfo; + int new_hl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + ip6h = ipv6_hdr(skb); switch (info->mode) { - case IP6T_HL_EQ: - return ip6h->hop_limit == info->hop_limit; - case IP6T_HL_NE: - return ip6h->hop_limit != info->hop_limit; - case IP6T_HL_LT: - return ip6h->hop_limit < info->hop_limit; - case IP6T_HL_GT: - return ip6h->hop_limit > info->hop_limit; + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; } - return false; + ip6h->hop_limit = new_hl; + + return XT_CONTINUE; +} + +static int ttl_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_TTL_info *info = par->targinfo; + + if (info->mode > IPT_TTL_MAXMODE) { + pr_info("TTL: invalid or unknown mode %u\n", info->mode); + return -EINVAL; + } + if (info->mode != IPT_TTL_SET && info->ttl == 0) + return -EINVAL; + return 0; +} + +static int hl_tg6_check(const struct xt_tgchk_param *par) +{ + const struct ip6t_HL_info *info = par->targinfo; + + if (info->mode > IP6T_HL_MAXMODE) { + pr_info("invalid or unknown mode %u\n", info->mode); + return -EINVAL; + } + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { + pr_info("increment/decrement does not " + "make sense with value 0\n"); + return -EINVAL; + } + return 0; } -static struct xt_match hl_mt_reg[] __read_mostly = { +static struct xt_target hl_tg_reg[] __read_mostly = { { - .name = "ttl", + .name = "TTL", .revision = 0, .family = NFPROTO_IPV4, - .match = ttl_mt, - .matchsize = sizeof(struct ipt_ttl_info), + .target = ttl_tg, + .targetsize = sizeof(struct ipt_TTL_info), + .table = "mangle", + .checkentry = ttl_tg_check, .me = THIS_MODULE, }, { - .name = "hl", + .name = "HL", .revision = 0, .family = NFPROTO_IPV6, - .match = hl_mt6, - .matchsize = sizeof(struct ip6t_hl_info), + .target = hl_tg6, + .targetsize = sizeof(struct ip6t_HL_info), + .table = "mangle", + .checkentry = hl_tg6_check, .me = THIS_MODULE, }, }; -static int __init hl_mt_init(void) +static int __init hl_tg_init(void) { - return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); + return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); } -static void __exit hl_mt_exit(void) +static void __exit hl_tg_exit(void) { - xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); + xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); } -module_init(hl_mt_init); -module_exit(hl_mt_exit); +module_init(hl_tg_init); +module_exit(hl_tg_exit); +MODULE_ALIAS("ipt_TTL"); +MODULE_ALIAS("ip6t_HL"); diff --git a/kernel/net/netfilter/xt_rateest.c b/kernel/net/netfilter/xt_rateest.c index 755d2f669..498b54fd0 100644 --- a/kernel/net/netfilter/xt_rateest.c +++ b/kernel/net/netfilter/xt_rateest.c @@ -8,149 +8,184 @@ #include #include #include +#include +#include +#include +#include +#include +#include #include -#include +#include #include +static DEFINE_MUTEX(xt_rateest_mutex); -static bool -xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par) +#define RATEEST_HSIZE 16 +static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly; +static unsigned int jhash_rnd __read_mostly; + +static unsigned int xt_rateest_hash(const char *name) { - const struct xt_rateest_match_info *info = par->matchinfo; - struct gnet_stats_rate_est64 sample = {0}; - u_int32_t bps1, bps2, pps1, pps2; - bool ret = true; - - gen_estimator_read(&info->est1->rate_est, &sample); - - if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps1 = info->bps1 >= sample.bps ? info->bps1 - sample.bps : 0; - pps1 = info->pps1 >= sample.pps ? info->pps1 - sample.pps : 0; - } else { - bps1 = sample.bps; - pps1 = sample.pps; - } + return jhash(name, FIELD_SIZEOF(struct xt_rateest, name), jhash_rnd) & + (RATEEST_HSIZE - 1); +} + +static void xt_rateest_hash_insert(struct xt_rateest *est) +{ + unsigned int h; + + h = xt_rateest_hash(est->name); + hlist_add_head(&est->list, &rateest_hash[h]); +} - if (info->flags & XT_RATEEST_MATCH_ABS) { - bps2 = info->bps2; - pps2 = info->pps2; - } else { - gen_estimator_read(&info->est2->rate_est, &sample); - - if (info->flags & XT_RATEEST_MATCH_DELTA) { - bps2 = info->bps2 >= sample.bps ? info->bps2 - sample.bps : 0; - pps2 = info->pps2 >= sample.pps ? info->pps2 - sample.pps : 0; - } else { - bps2 = sample.bps; - pps2 = sample.pps; +struct xt_rateest *xt_rateest_lookup(const char *name) +{ + struct xt_rateest *est; + unsigned int h; + + h = xt_rateest_hash(name); + mutex_lock(&xt_rateest_mutex); + hlist_for_each_entry(est, &rateest_hash[h], list) { + if (strcmp(est->name, name) == 0) { + est->refcnt++; + mutex_unlock(&xt_rateest_mutex); + return est; } } + mutex_unlock(&xt_rateest_mutex); + return NULL; +} +EXPORT_SYMBOL_GPL(xt_rateest_lookup); - switch (info->mode) { - case XT_RATEEST_MATCH_LT: - if (info->flags & XT_RATEEST_MATCH_BPS) - ret &= bps1 < bps2; - if (info->flags & XT_RATEEST_MATCH_PPS) - ret &= pps1 < pps2; - break; - case XT_RATEEST_MATCH_GT: - if (info->flags & XT_RATEEST_MATCH_BPS) - ret &= bps1 > bps2; - if (info->flags & XT_RATEEST_MATCH_PPS) - ret &= pps1 > pps2; - break; - case XT_RATEEST_MATCH_EQ: - if (info->flags & XT_RATEEST_MATCH_BPS) - ret &= bps1 == bps2; - if (info->flags & XT_RATEEST_MATCH_PPS) - ret &= pps1 == pps2; - break; +void xt_rateest_put(struct xt_rateest *est) +{ + mutex_lock(&xt_rateest_mutex); + if (--est->refcnt == 0) { + hlist_del(&est->list); + gen_kill_estimator(&est->rate_est); + /* + * gen_estimator est_timer() might access est->lock or bstats, + * wait a RCU grace period before freeing 'est' + */ + kfree_rcu(est, rcu); } - - ret ^= info->flags & XT_RATEEST_MATCH_INVERT ? true : false; - return ret; + mutex_unlock(&xt_rateest_mutex); } +EXPORT_SYMBOL_GPL(xt_rateest_put); -static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) +static unsigned int +xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct xt_rateest_match_info *info = par->matchinfo; - struct xt_rateest *est1, *est2; - int ret = -EINVAL; + const struct xt_rateest_target_info *info = par->targinfo; + struct gnet_stats_basic_packed *stats = &info->est->bstats; - if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | - XT_RATEEST_MATCH_REL)) != 1) - goto err1; + spin_lock_bh(&info->est->lock); + stats->bytes += skb->len; + stats->packets++; + spin_unlock_bh(&info->est->lock); - if (!(info->flags & (XT_RATEEST_MATCH_BPS | XT_RATEEST_MATCH_PPS))) - goto err1; + return XT_CONTINUE; +} - switch (info->mode) { - case XT_RATEEST_MATCH_EQ: - case XT_RATEEST_MATCH_LT: - case XT_RATEEST_MATCH_GT: - break; - default: - goto err1; +static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) +{ + struct xt_rateest_target_info *info = par->targinfo; + struct xt_rateest *est; + struct { + struct nlattr opt; + struct gnet_estimator est; + } cfg; + int ret; + + net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); + + est = xt_rateest_lookup(info->name); + if (est) { + /* + * If estimator parameters are specified, they must match the + * existing estimator. + */ + if ((!info->interval && !info->ewma_log) || + (info->interval != est->params.interval || + info->ewma_log != est->params.ewma_log)) { + xt_rateest_put(est); + return -EINVAL; + } + info->est = est; + return 0; } - ret = -ENOENT; - est1 = xt_rateest_lookup(info->name1); - if (!est1) + ret = -ENOMEM; + est = kzalloc(sizeof(*est), GFP_KERNEL); + if (!est) goto err1; - est2 = NULL; - if (info->flags & XT_RATEEST_MATCH_REL) { - est2 = xt_rateest_lookup(info->name2); - if (!est2) - goto err2; - } + strlcpy(est->name, info->name, sizeof(est->name)); + spin_lock_init(&est->lock); + est->refcnt = 1; + est->params.interval = info->interval; + est->params.ewma_log = info->ewma_log; + + cfg.opt.nla_len = nla_attr_size(sizeof(cfg.est)); + cfg.opt.nla_type = TCA_STATS_RATE_EST; + cfg.est.interval = info->interval; + cfg.est.ewma_log = info->ewma_log; + + ret = gen_new_estimator(&est->bstats, NULL, &est->rate_est, + &est->lock, NULL, &cfg.opt); + if (ret < 0) + goto err2; - info->est1 = est1; - info->est2 = est2; + info->est = est; + xt_rateest_hash_insert(est); return 0; err2: - xt_rateest_put(est1); + kfree(est); err1: return ret; } -static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) +static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) { - struct xt_rateest_match_info *info = par->matchinfo; + struct xt_rateest_target_info *info = par->targinfo; - xt_rateest_put(info->est1); - if (info->est2) - xt_rateest_put(info->est2); + xt_rateest_put(info->est); } -static struct xt_match xt_rateest_mt_reg __read_mostly = { - .name = "rateest", +static struct xt_target xt_rateest_tg_reg __read_mostly = { + .name = "RATEEST", .revision = 0, .family = NFPROTO_UNSPEC, - .match = xt_rateest_mt, - .checkentry = xt_rateest_mt_checkentry, - .destroy = xt_rateest_mt_destroy, - .matchsize = sizeof(struct xt_rateest_match_info), - .usersize = offsetof(struct xt_rateest_match_info, est1), + .target = xt_rateest_tg, + .checkentry = xt_rateest_tg_checkentry, + .destroy = xt_rateest_tg_destroy, + .targetsize = sizeof(struct xt_rateest_target_info), + .usersize = offsetof(struct xt_rateest_target_info, est), .me = THIS_MODULE, }; -static int __init xt_rateest_mt_init(void) +static int __init xt_rateest_tg_init(void) { - return xt_register_match(&xt_rateest_mt_reg); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(rateest_hash); i++) + INIT_HLIST_HEAD(&rateest_hash[i]); + + return xt_register_target(&xt_rateest_tg_reg); } -static void __exit xt_rateest_mt_fini(void) +static void __exit xt_rateest_tg_fini(void) { - xt_unregister_match(&xt_rateest_mt_reg); + xt_unregister_target(&xt_rateest_tg_reg); } + MODULE_AUTHOR("Patrick McHardy "); MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("xtables rate estimator match"); -MODULE_ALIAS("ipt_rateest"); -MODULE_ALIAS("ip6t_rateest"); -module_init(xt_rateest_mt_init); -module_exit(xt_rateest_mt_fini); +MODULE_DESCRIPTION("Xtables: packet rate estimator"); +MODULE_ALIAS("ipt_RATEEST"); +MODULE_ALIAS("ip6t_RATEEST"); +module_init(xt_rateest_tg_init); +module_exit(xt_rateest_tg_fini); diff --git a/kernel/net/netfilter/xt_tcpmss.c b/kernel/net/netfilter/xt_tcpmss.c index c53d4d18e..9dae4d665 100644 --- a/kernel/net/netfilter/xt_tcpmss.c +++ b/kernel/net/netfilter/xt_tcpmss.c @@ -1,110 +1,353 @@ -/* Kernel module to match TCP MSS values. */ - -/* Copyright (C) 2000 Marc Boucher - * Portions (C) 2005 by Harald Welte +/* + * This is a module which is used for setting the MSS option in TCP packets. + * + * Copyright (C) 2000 Marc Boucher + * Copyright (C) 2007 Patrick McHardy * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include +#include +#include +#include +#include +#include +#include +#include #include -#include -#include - #include #include +#include +#include +#include MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); -MODULE_DESCRIPTION("Xtables: TCP MSS match"); -MODULE_ALIAS("ipt_tcpmss"); -MODULE_ALIAS("ip6t_tcpmss"); +MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment"); +MODULE_ALIAS("ipt_TCPMSS"); +MODULE_ALIAS("ip6t_TCPMSS"); + +static inline unsigned int +optlen(const u_int8_t *opt, unsigned int offset) +{ + /* Beware zero-length options: make finite progress */ + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) + return 1; + else + return opt[offset+1]; +} + +static u_int32_t tcpmss_reverse_mtu(struct net *net, + const struct sk_buff *skb, + unsigned int family) +{ + struct flowi fl; + const struct nf_afinfo *ai; + struct rtable *rt = NULL; + u_int32_t mtu = ~0U; + + if (family == PF_INET) { + struct flowi4 *fl4 = &fl.u.ip4; + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = ip_hdr(skb)->saddr; + } else { + struct flowi6 *fl6 = &fl.u.ip6; + + memset(fl6, 0, sizeof(*fl6)); + fl6->daddr = ipv6_hdr(skb)->saddr; + } + ai = nf_get_afinfo(family); + if (ai != NULL) + ai->route(net, (struct dst_entry **)&rt, &fl, false); + + if (rt != NULL) { + mtu = dst_mtu(&rt->dst); + dst_release(&rt->dst); + } + return mtu; +} -static bool -tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par) +static int +tcpmss_mangle_packet(struct sk_buff *skb, + const struct xt_action_param *par, + unsigned int family, + unsigned int tcphoff, + unsigned int minlen) { - const struct xt_tcpmss_match_info *info = par->matchinfo; - const struct tcphdr *th; - struct tcphdr _tcph; - /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ - const u_int8_t *op; - u8 _opt[15 * 4 - sizeof(_tcph)]; - unsigned int i, optlen; - - /* If we don't have the whole header, drop packet. */ - th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); - if (th == NULL) - goto dropit; - - /* Malformed. */ - if (th->doff*4 < sizeof(*th)) - goto dropit; - - optlen = th->doff*4 - sizeof(*th); - if (!optlen) - goto out; - - /* Truncated options. */ - op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt); - if (op == NULL) - goto dropit; - - for (i = 0; i < optlen; ) { - if (op[i] == TCPOPT_MSS - && (optlen - i) >= TCPOLEN_MSS - && op[i+1] == TCPOLEN_MSS) { - u_int16_t mssval; - - mssval = (op[i+2] << 8) | op[i+3]; - - return (mssval >= info->mss_min && - mssval <= info->mss_max) ^ info->invert; + const struct xt_tcpmss_info *info = par->targinfo; + struct tcphdr *tcph; + int len, tcp_hdrlen; + unsigned int i; + __be16 oldval; + u16 newmss; + u8 *opt; + + /* This is a fragment, no TCP header is available */ + if (par->fragoff != 0) + return 0; + + if (!skb_make_writable(skb, skb->len)) + return -1; + + len = skb->len - tcphoff; + if (len < (int)sizeof(struct tcphdr)) + return -1; + + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + tcp_hdrlen = tcph->doff * 4; + + if (len < tcp_hdrlen || tcp_hdrlen < sizeof(struct tcphdr)) + return -1; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU) { + struct net *net = xt_net(par); + unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); + + if (min_mtu <= minlen) { + net_err_ratelimited("unknown or invalid path-MTU (%u)\n", + min_mtu); + return -1; + } + newmss = min_mtu - minlen; + } else + newmss = info->mss; + + opt = (u_int8_t *)tcph; + for (i = sizeof(struct tcphdr); i <= tcp_hdrlen - TCPOLEN_MSS; i += optlen(opt, i)) { + if (opt[i] == TCPOPT_MSS && opt[i+1] == TCPOLEN_MSS) { + u_int16_t oldmss; + + oldmss = (opt[i+2] << 8) | opt[i+3]; + + /* Never increase MSS, even when setting it, as + * doing so results in problems for hosts that rely + * on MSS being set correctly. + */ + if (oldmss <= newmss) + return 0; + + opt[i+2] = (newmss & 0xff00) >> 8; + opt[i+3] = newmss & 0x00ff; + + inet_proto_csum_replace2(&tcph->check, skb, + htons(oldmss), htons(newmss), + false); + return 0; } - if (op[i] < 2) - i++; - else - i += op[i+1] ? : 1; } -out: - return info->invert; -dropit: - par->hotdrop = true; + /* There is data after the header so the option can't be added + * without moving it, and doing so may make the SYN packet + * itself too large. Accept the packet unmodified instead. + */ + if (len > tcp_hdrlen) + return 0; + + /* tcph->doff has 4 bits, do not wrap it to 0 */ + if (tcp_hdrlen >= 15 * 4) + return 0; + + /* + * MSS Option not found ?! add it.. + */ + if (skb_tailroom(skb) < TCPOLEN_MSS) { + if (pskb_expand_head(skb, 0, + TCPOLEN_MSS - skb_tailroom(skb), + GFP_ATOMIC)) + return -1; + tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff); + } + + skb_put(skb, TCPOLEN_MSS); + + /* + * IPv4: RFC 1122 states "If an MSS option is not received at + * connection setup, TCP MUST assume a default send MSS of 536". + * IPv6: RFC 2460 states IPv6 has a minimum MTU of 1280 and a minimum + * length IPv6 header of 60, ergo the default MSS value is 1220 + * Since no MSS was provided, we must use the default values + */ + if (xt_family(par) == NFPROTO_IPV4) + newmss = min(newmss, (u16)536); + else + newmss = min(newmss, (u16)1220); + + opt = (u_int8_t *)tcph + sizeof(struct tcphdr); + memmove(opt + TCPOLEN_MSS, opt, len - sizeof(struct tcphdr)); + + inet_proto_csum_replace2(&tcph->check, skb, + htons(len), htons(len + TCPOLEN_MSS), true); + opt[0] = TCPOPT_MSS; + opt[1] = TCPOLEN_MSS; + opt[2] = (newmss & 0xff00) >> 8; + opt[3] = newmss & 0x00ff; + + inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), false); + + oldval = ((__be16 *)tcph)[6]; + tcph->doff += TCPOLEN_MSS/4; + inet_proto_csum_replace2(&tcph->check, skb, + oldval, ((__be16 *)tcph)[6], false); + return TCPOLEN_MSS; +} + +static unsigned int +tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct iphdr *iph = ip_hdr(skb); + __be16 newlen; + int ret; + + ret = tcpmss_mangle_packet(skb, par, + PF_INET, + iph->ihl * 4, + sizeof(*iph) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + iph = ip_hdr(skb); + newlen = htons(ntohs(iph->tot_len) + ret); + csum_replace2(&iph->check, iph->tot_len, newlen); + iph->tot_len = newlen; + } + return XT_CONTINUE; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static unsigned int +tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + u8 nexthdr; + __be16 frag_off, oldlen, newlen; + int tcphoff; + int ret; + + nexthdr = ipv6h->nexthdr; + tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off); + if (tcphoff < 0) + return NF_DROP; + ret = tcpmss_mangle_packet(skb, par, + PF_INET6, + tcphoff, + sizeof(*ipv6h) + sizeof(struct tcphdr)); + if (ret < 0) + return NF_DROP; + if (ret > 0) { + ipv6h = ipv6_hdr(skb); + oldlen = ipv6h->payload_len; + newlen = htons(ntohs(oldlen) + ret); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(csum_sub(skb->csum, oldlen), + newlen); + ipv6h->payload_len = newlen; + } + return XT_CONTINUE; +} +#endif + +/* Must specify -p tcp --syn */ +static inline bool find_syn_match(const struct xt_entry_match *m) +{ + const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data; + + if (strcmp(m->u.kernel.match->name, "tcp") == 0 && + tcpinfo->flg_cmp & TCPHDR_SYN && + !(tcpinfo->invflags & XT_TCP_INV_FLAGS)) + return true; + return false; } -static struct xt_match tcpmss_mt_reg[] __read_mostly = { +static int tcpmss_tg4_check(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + const struct ipt_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info("path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + if (par->nft_compat) + return 0; + + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return 0; + pr_info("Only works on TCP SYN packets\n"); + return -EINVAL; +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static int tcpmss_tg6_check(const struct xt_tgchk_param *par) +{ + const struct xt_tcpmss_info *info = par->targinfo; + const struct ip6t_entry *e = par->entryinfo; + const struct xt_entry_match *ematch; + + if (info->mss == XT_TCPMSS_CLAMP_PMTU && + (par->hook_mask & ~((1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING))) != 0) { + pr_info("path-MTU clamping only supported in " + "FORWARD, OUTPUT and POSTROUTING hooks\n"); + return -EINVAL; + } + if (par->nft_compat) + return 0; + + xt_ematch_foreach(ematch, e) + if (find_syn_match(ematch)) + return 0; + pr_info("Only works on TCP SYN packets\n"); + return -EINVAL; +} +#endif + +static struct xt_target tcpmss_tg_reg[] __read_mostly = { { - .name = "tcpmss", .family = NFPROTO_IPV4, - .match = tcpmss_mt, - .matchsize = sizeof(struct xt_tcpmss_match_info), + .name = "TCPMSS", + .checkentry = tcpmss_tg4_check, + .target = tcpmss_tg4, + .targetsize = sizeof(struct xt_tcpmss_info), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) { - .name = "tcpmss", .family = NFPROTO_IPV6, - .match = tcpmss_mt, - .matchsize = sizeof(struct xt_tcpmss_match_info), + .name = "TCPMSS", + .checkentry = tcpmss_tg6_check, + .target = tcpmss_tg6, + .targetsize = sizeof(struct xt_tcpmss_info), .proto = IPPROTO_TCP, .me = THIS_MODULE, }, +#endif }; -static int __init tcpmss_mt_init(void) +static int __init tcpmss_tg_init(void) { - return xt_register_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg)); + return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg)); } -static void __exit tcpmss_mt_exit(void) +static void __exit tcpmss_tg_exit(void) { - xt_unregister_matches(tcpmss_mt_reg, ARRAY_SIZE(tcpmss_mt_reg)); + xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg)); } -module_init(tcpmss_mt_init); -module_exit(tcpmss_mt_exit); +module_init(tcpmss_tg_init); +module_exit(tcpmss_tg_exit); diff --git a/lib/headers/pkey.h b/lib/headers/pkey.h index 0cc7e8564..6e3e2d179 100644 --- a/lib/headers/pkey.h +++ b/lib/headers/pkey.h @@ -62,10 +62,11 @@ pkey_set_real(int pkru, int pkey) extern "C" { #endif int pkey_read(int idx); -int pkey_mprotect(void *ptr, size_t size, unsigned long orig_prot, unsigned long pkey) ; -int pkey_alloc(int, int); -int pkey_free(unsigned long pkey); - +/* +int pkey_mprotect(void *ptr, size_t size, int orig_prot, int pkey) ; +int pkey_alloc(unsigned int, unsigned int); +int pkey_free(int pkey); +*/ int evict_mprotect(struct mprot* m1, struct mprot* m2); int pkey_sync(void); #ifdef __cplusplus diff --git a/lib/heap/mpk_heap.cc b/lib/heap/mpk_heap.cc index cab46a931..31a6ab77c 100644 --- a/lib/heap/mpk_heap.cc +++ b/lib/heap/mpk_heap.cc @@ -8,7 +8,7 @@ #include #include "mpk_heap.h" -#define LOGGING 0 +#define LOGGING 1 #define __SOURCEFILE__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define rlog(format, ...) { \ if( LOGGING ) { \ @@ -52,7 +52,6 @@ int mpk_create(){ #endif mpk[mpk_id]->mpk_id = mpk_id; mpk[mpk_id]->start = NULL; // mpk_alloc will do the actual mmap - mpk[mpk_id]->total_size = 0; mpk[mpk_id]->free_list_head = NULL; mpk[mpk_id]->free_list_tail = NULL; pthread_mutex_init(&mpk[mpk_id]->mlock, NULL); @@ -131,9 +130,13 @@ void *mpk_mmap(int mpk_id, } mpk[mpk_id]->start = base; mpk[mpk_id]->total_size = len; + + free_list_init(mpk_id); + rlog("Memdom ID %d mmaped at %p\n", mpk_id, base); - rlog("[%s] mpk %d mmaped 0x%lx bytes at %p\n", __func__, mpk_id, len, base); + rlog("[%s] mpk %d mmaped 0x%lx bytes at %p\n", __func__, mpk_id, mpk[mpk_id]->total_size, mpk[mpk_id]->start); + rlog("[%s] mpk %d free list addr: %p, free list size: %d\n", __func__, mpk_id, mpk[mpk_id]->free_list_tail->addr, mpk[mpk_id]->free_list_tail->size); return base; } @@ -269,6 +272,7 @@ void *mpk_alloc(int mpk_id, unsigned long sz) { * check if the last element in free list is available, * allocate memory from it */ rlog("[%s] mpk %d search from tail for 0x%lx bytes\n", __func__, mpk_id, sz); + if ( free_list && sz <= free_list->size ) { memblock = (char*)free_list->addr; diff --git a/lib/heap/mpk_heap.h b/lib/heap/mpk_heap.h index 7d7743cc2..01dcab1ca 100644 --- a/lib/heap/mpk_heap.h +++ b/lib/heap/mpk_heap.h @@ -99,6 +99,9 @@ void mpk_free(void* data); /* Get the calling thread's defualt mpk id */ int mpk_private_id(void); +/* Initialize free list */ +void free_list_init(int mpk_id); + #ifdef __cplusplus } #endif diff --git a/lib/heap/pkey.cc b/lib/heap/pkey.cc index 395e3519b..1d76f0aa3 100644 --- a/lib/heap/pkey.cc +++ b/lib/heap/pkey.cc @@ -8,7 +8,7 @@ pkey_read(int idx) int eax = rdpkru(); return (eax >> (idx * 2)) & 3; } - +/* int pkey_mprotect(void *ptr, size_t size, unsigned long orig_prot, unsigned long pkey) @@ -17,17 +17,17 @@ return syscall(SYS_pkey_mprotect, ptr, size, orig_prot, pkey); } int -pkey_alloc(int flag, int permit) +pkey_alloc(unsigned int flag, unsigned int permit) { return syscall(SYS_pkey_alloc, flag, permit); } int -pkey_free(unsigned long pkey) +pkey_free(int pkey) { return syscall(SYS_pkey_free, pkey); } - +*/ int pkey_sync() { return syscall(__NR_pkey_sync, rdpkru()); diff --git a/lib/heap/pkey.h b/lib/heap/pkey.h index 71bd8e762..fd0341423 100644 --- a/lib/heap/pkey.h +++ b/lib/heap/pkey.h @@ -16,6 +16,7 @@ #define LOGGING 0 #define __SOURCEFILE__ (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) +/* #define rlog(format, ...) { \ if( LOGGING ) { \ FILE *fp = fopen("/home/soyeon/log/log2", "a"); \ @@ -24,6 +25,7 @@ fclose(fp); \ }\ } +*/ static inline void wrpkru(unsigned int pkru) { @@ -42,7 +44,7 @@ rdpkru() { : "=a" (eax), "=d" (edx) : "c" (0)); return eax; } - +/* static inline int pkey_set(int pkru) { @@ -50,7 +52,7 @@ asm volatile(".byte 0x0f,0x01,0xef\n\t" : : "a" (pkru), "c" (0), "d" (0)); return 0; } - +*/ static int pkey_set_real(int pkru, int pkey) { @@ -65,9 +67,11 @@ pkey_set_real(int pkru, int pkey) } int pkey_read(int idx); +/* int pkey_mprotect(void *ptr, size_t size, unsigned long orig_prot, unsigned long pkey) ; -int pkey_alloc(int flag, int permit); -int pkey_free(unsigned long pkey); +int pkey_alloc(unsigned int flag, unsigned int permit); +int pkey_free(int pkey); +*/ #ifdef __cplusplus extern "C" { #endif diff --git a/lib/mpt.cc b/lib/mpt.cc index 70698ab0b..0169aa069 100644 --- a/lib/mpt.cc +++ b/lib/mpt.cc @@ -213,8 +213,8 @@ int mpt_mmap(void** addr, size_t length, int prot, int flags) static std::atomic_int m_cnt; int id = m_cnt.fetch_add(1, std::memory_order_relaxed); void* r_addr = (void *)syscall(335, NULL, length, prot, flags | MAP_ANONYMOUS | MAP_PRIVATE, id); - //mpt_node* mn = hash_get(id); //new mpt_node(r_addr, length, prot); - //hash_put(id, mn); +// mpt_node* mn = hash_get(id); //new mpt_node(r_addr, length, prot); +// hash_put(id, mn); *addr = r_addr; n_mmap.fetch_add(1, std::memory_order_relaxed); diff --git a/lib/pkey.cc b/lib/pkey.cc index ff7cee621..51a49cd79 100644 --- a/lib/pkey.cc +++ b/lib/pkey.cc @@ -8,26 +8,26 @@ pkey_read(int idx) int eax = rdpkru(); return (eax >> (idx * 2)) & 3; } - +/* int -pkey_mprotect(void *ptr, size_t size, unsigned long orig_prot, -unsigned long pkey) +pkey_mprotect(void *ptr, size_t size, int orig_prot, +int pkey) { return syscall(SYS_pkey_mprotect, ptr, size, orig_prot, pkey); } int -pkey_alloc(int flags, int permit) +pkey_alloc(unsigned int flags, unsigned int permit) { return syscall(SYS_pkey_alloc, flags, permit); } int -pkey_free(unsigned long pkey) +pkey_free(int pkey) { return syscall(SYS_pkey_free, pkey); } - +*/ int evict_mprotect(struct mprot* m1, struct mprot* m2) {