Hook | Called... |
---|---|
NF_IP_PRE_ROUTING | After sanity checks, before routing decisions. |
NF_IP_LOCAL_IN | After routing decisions if packet is for this host. |
NF_IP_FORWARD | If the packet is destined for another interface. |
NF_IP_LOCAL_OUT | For packets coming from local processes on their way out. |
NF_IP_POST_ROUTING | Just before outbound packets "hit the wire". |
261 /*我們關(guān)心其中的那個(gè)returen語(yǔ)句,調(diào)用了函數(shù)NF_HOOK,這個(gè)自然就是netfilter的hook調(diào)用。我們發(fā)現(xiàn)這其實(shí)是一個(gè)宏,那么繼續(xù)深入下去看看:
262 * Deliver IP Packets to the higher protocol layers.
263 */
264 int ip_local_deliver(struct sk_buff *skb)
265 {
266 /*
267 * Reassemble IP fragments.
268 */
269
270 if (skb->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
271 skb = ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER);
272 if (!skb)
273 return 0;
274 }
275
276 return NF_HOOK(PF_INET, NF_IP_LOCAL_IN, skb, skb->dev, NULL,
277 ip_local_deliver_finish);
278 }
246 #define NF_HOOK(pf, hook, skb, indev, outdev, okfn) \可見它的形參分別是協(xié)議類型,鉤子類型,skb,進(jìn)去的device,出去的device以及回調(diào)函數(shù)指針,它首先會(huì)探尋說(shuō)我們的規(guī)則表中有沒有對(duì)這類情況注冊(cè)鉤子函數(shù)進(jìn)行匹配處理,如果有的話,會(huì)調(diào)用鉤子函數(shù),如果沒有的話,則繼續(xù)執(zhí)行形參中的回調(diào)函數(shù),完成整個(gè)過程??梢妌etfilter是一個(gè)很輕量級(jí)的,和內(nèi)核網(wǎng)絡(luò)代碼能輕易剝離的防火墻。我們繼續(xù)往下看:
247 NF_HOOK_THRESH(pf, hook, skb, indev, outdev, okfn, INT_MIN)
182 /**這里出現(xiàn)了一個(gè)非常重要的數(shù)據(jù)結(jié)構(gòu)nf_hooks,我們?nèi)タ匆幌碌降资窃趺礃幼拥?br>58 struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
183 * nf_hook_thresh - call a netfilter hook
184 *
185 * Returns 1 if the hook has allowed the packet to pass. The function
186 * okfn must be invoked by the caller in this case. Any other return
187 * value indicates the packet has been consumed by the hook.
188 */
189 static inline int nf_hook_thresh(int pf, unsigned int hook,
190 struct sk_buff **pskb,
191 struct net_device *indev,
192 struct net_device *outdev,
193 int (*okfn)(struct sk_buff *), int thresh,
194 int cond)
195 {
196 if (!cond)
197 return 1;
198 #ifndef CONFIG_NETFILTER_DEBUG
199 if (list_empty(&nf_hooks[pf][hook]))
200 return 1;
201 #endif
202 return nf_hook_slow(pf, hook, pskb, indev, outdev, okfn, thresh);
203 }
142 static int __init iptable_filter_init(void)其中的注冊(cè)table和注冊(cè)鉤子函數(shù)就很清晰了,這些都是在初始化時(shí)候完成的。我們繼續(xù)看nf_register_hooks函數(shù),它調(diào)用了nf_register_hook函數(shù)。
143 {
144 int ret;
145
146 if (forward < 0 || forward > NF_MAX_VERDICT) {
147 printk("iptables forward must be 0 or 1\n");
148 return -EINVAL;
149 }
150
151 /* Entry 1 is the FORWARD hook */
152 initial_table.entries[1].target.verdict = -forward - 1;
153
154 /* Register table */
155 ret = ipt_register_table(&packet_filter, &initial_table.repl);
156 if (ret < 0)
157 return ret;
158
159 /* Register hooks */
160 ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
161 if (ret < 0)
162 goto cleanup_table;
163
164 return ret;
165
166 cleanup_table:
167 ipt_unregister_table(&packet_filter);
168 return ret;
169 }
62 int nf_register_hook(struct nf_hook_ops *reg)這個(gè)函數(shù)很清楚了,它注冊(cè)一個(gè)數(shù)據(jù)結(jié)構(gòu)到nf_hook_ops的數(shù)據(jù)結(jié)構(gòu)到表nf_hooks中的相應(yīng)位置中去,在list中的位置根據(jù)reg的priority的值,應(yīng)該是數(shù)值越小,優(yōu)先級(jí)越高,就越先處理。而nf_hook_ops的內(nèi)容猜都能猜出來(lái)吧,肯定是鉤子函數(shù)咯。
63 {
64 struct list_head *i;
65
66 spin_lock_bh(&nf_hook_lock);
67 list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
68 if (reg->priority < ((struct nf_hook_ops *)i)->priority)
69 break;
70 }
71 list_add_rcu(?->list, i->prev);
72 spin_unlock_bh(&nf_hook_lock);
73
74 synchronize_net();
75 return 0;
76 }
161 int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,我們發(fā)現(xiàn)一個(gè)變量verdict,這個(gè)就是鉤子函數(shù)對(duì)數(shù)據(jù)包的處理結(jié)果,它有以下幾種類型,NF_STOP我也不知道是干嘛的??!
162 struct net_device *indev,
163 struct net_device *outdev,
164 int (*okfn)(struct sk_buff *),
165 int hook_thresh)
166 {
167 struct list_head *elem;
168 unsigned int verdict;
169 int ret = 0;
170
171 /* We may already have this, but read-locks nest anyway */
172 rcu_read_lock();
173
174 elem = &nf_hooks[pf][hook];
175 next_hook:
176 verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
177 outdev, &elem, okfn, hook_thresh);
178 if (verdict == NF_ACCEPT || verdict == NF_STOP) {
179 ret = 1;
180 goto unlock;
181 } else if (verdict == NF_DROP) {
182 kfree_skb(*pskb);
183 ret = -EPERM;
184 } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
185 NFDEBUG("nf_hook: Verdict = QUEUE.\n");
186 if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn,
187 verdict >> NF_VERDICT_BITS))
188 goto next_hook;
189 }
190 unlock:
191 rcu_read_unlock();
192 return ret;
193 }
Return Code | Meaning |
---|---|
NF_DROP | Discard the packet. |
NF_ACCEPT | Keep the packet. |
NF_STOLEN | Forget about the packet. |
NF_QUEUE | Queue packet for userspace. |
NF_REPEAT | Call this hook function again. |
117 unsigned int nf_iterate(struct list_head *head,
118 struct sk_buff **skb,
119 int hook,
120 const struct net_device *indev,
121 const struct net_device *outdev,
122 struct list_head **i,
123 int (*okfn)(struct sk_buff *),
124 int hook_thresh)
125 {
126 unsigned int verdict;
127
128 /*
129 * The caller must not block between calls to this
130 * function because of risk of continuing from deleted element.
131 */
132 list_for_each_continue_rcu(*i, head) {
133 struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
134
135 if (hook_thresh > elem->priority)
136 continue;
137
138 /* Optimization: we don't need to hold module
139 reference here, since function can't sleep. --RR */
140 verdict = elem->hook(hook, skb, indev, outdev, okfn);
141 if (verdict != NF_ACCEPT) {
142 #ifdef CONFIG_NETFILTER_DEBUG
143 if (unlikely((verdict & NF_VERDICT_MASK)
144 > NF_MAX_VERDICT)) {
145 NFDEBUG("Evil return from %p(%u).\n",
146 elem->hook, hook);
147 continue;
148 }
149 #endif
150 if (verdict != NF_REPEAT)
151 return verdict;
152 *i = (*i)->prev;
153 }
154 }
155 return NF_ACCEPT;
156 }
這下我們應(yīng)該清楚了,這個(gè)迭代就是挨個(gè)運(yùn)行nf_hooks[pf][hook]所指向鏈表中的鉤子函數(shù)elem->hook。
如果其中有一個(gè)鉤子函數(shù)沒有ACCEPT且不是repeat,就直接跳出循環(huán)了,然后返回verdict,如果ACCEPT了,則繼續(xù)處理下一個(gè)鉤子函數(shù),直到處理完。
60 struct nf_hook_ops我們看看這個(gè)數(shù)據(jù)結(jié)構(gòu),再想想前面注冊(cè)hook時(shí)候的情景,應(yīng)該明白了。它其中定義了pf和hooknum,指定了在nf_hooks表中的元素位置,nf_hookfn則是現(xiàn)實(shí)的鉤子函數(shù),而priority則指定了它在這個(gè)鏈表中的位置,按照升序排列。那么nf_hookfn是什么時(shí)候指定的呢?這個(gè)自然是和各個(gè)協(xié)議相關(guān)的。在net/ipv4/netfilter/iptable_filter.c中,我們看到這么一個(gè)賦值語(yǔ)句。
61 {
62 struct list_head list;
63
64 /* User fills in from here down. */
65 nf_hookfn *hook;
66 struct module *owner;
67 int pf;
68 int hooknum;
69 /* Hooks are ordered in ascending priority. */
70 int priority;
71 };
114 static struct nf_hook_ops ipt_ops[] = {這是在ipv4的filter中預(yù)先注冊(cè)的,我們知道還有預(yù)先注冊(cè)的像nat和mangle,當(dāng)然我們也可以自己寫模塊,實(shí)現(xiàn)這個(gè)hook函數(shù)。像這個(gè)例子中,第一個(gè)的hook函數(shù)就是ipt_hook,屬于的協(xié)議是ipv4,屬于的鉤子類型是LOCAL_IN。
115 {
116 .hook = ipt_hook,
117 .owner = THIS_MODULE,
118 .pf = PF_INET,
119 .hooknum = NF_IP_LOCAL_IN,
120 .priority = NF_IP_PRI_FILTER,
121 },
122 {
123 .hook = ipt_hook,
124 .owner = THIS_MODULE,
125 .pf = PF_INET,
126 .hooknum = NF_IP_FORWARD,
127 .priority = NF_IP_PRI_FILTER,
128 },
129 {
130 .hook = ipt_local_out_hook,
131 .owner = THIS_MODULE,
132 .pf = PF_INET,
133 .hooknum = NF_IP_LOCAL_OUT,
134 .priority = NF_IP_PRI_FILTER,
135 },
136 };
215 /* Returns one of the generic firewall policies, like NF_ACCEPT. */做一些基本的注釋,其實(shí)我不喜歡一篇技術(shù)博客長(zhǎng)篇累牘貼代碼,尤其是Linux內(nèi)核源代碼,更多的是希望能夠進(jìn)行分析。但往往越是討厭的事情,自己往往又這么做了。好吧,還是來(lái)解釋一下這個(gè)函數(shù)吧。
216 unsigned int
217 ipt_do_table(struct sk_buff **pskb,
218 unsigned int hook,
219 const struct net_device *in,
220 const struct net_device *out,
221 struct ipt_table *table,//這個(gè)是我們要操作的table,如filter
222 void *userdata)
223 {
224 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
225 u_int16_t offset;
226 struct iphdr *ip;
227 u_int16_t datalen;
228 int hotdrop = 0;
//hotdrop為1時(shí)就直接返回NF_DROP了,這是快速扔包的方法
229 /* Initializing verdict to NF_DROP keeps gcc happy. */
230 unsigned int verdict = NF_DROP;
231 const char *indev, *outdev;
232 void *table_base;
233 struct ipt_entry *e, *back;
234 struct xt_table_info *private = table->private;
//xt_table_info中蘊(yùn)含了整套規(guī)則,以及這些規(guī)則的偏移量,使尋找變得容易
235
236 /* Initialization */
237 ip = (*pskb)->nh.iph;
238 datalen = (*pskb)->len - ip->ihl * 4;
239 indev = in ? in->name : nulldevname;
240 outdev = out ? out->name : nulldevname;
241 /* We handle fragments by dealing with the first fragment as
242 * if it was a normal packet. All other fragments are treated
243 * normally, except that they will NEVER match rules that ask
244 * things we don't know, ie. tcp syn flag or ports). If the
245 * rule is also a fragment-specific rule, non-fragments won't
246 * match it. */
247 offset = ntohs(ip->frag_off) & IP_OFFSET;
248
249 read_lock_bh(&table->lock);
250 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
251 table_base = (void *)private->entries[smp_processor_id()];
252 e = get_entry(table_base, private->hook_entry[hook]);
//得到這個(gè)hook點(diǎn)起始規(guī)則的偏移
253
254 /* For return from builtin chain */
255 back = get_entry(table_base, private->underflow[hook]);
//得到這個(gè)hook點(diǎn)規(guī)則末尾的偏移
256
257 do {
258 IP_NF_ASSERT(e);
259 IP_NF_ASSERT(back);
260 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
261 struct ipt_entry_target *t;
262
263 if (IPT_MATCH_ITERATE(e, do_match,
264 *pskb, in, out,
265 offset, &hotdrop) != 0)
//這個(gè)宏用來(lái)遍歷所有的match,會(huì)調(diào)用do_match函數(shù)
266 goto no_match;
//如果沒有匹配的規(guī)則,則跳轉(zhuǎn)到no_match
267
268 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
269
270 t = ipt_get_target(e);//獲得target
271 IP_NF_ASSERT(t->u.kernel.target);
272 /* Standard target? */
273 if (!t->u.kernel.target->target) {
//當(dāng)為NULL時(shí),就是standard target,它是沒有模塊定義target函數(shù)的
274 int v;
275
276 v = ((struct ipt_standard_target *)t)->verdict;
277 if (v < 0) {
278 /* Pop from stack? */
279 if (v != IPT_RETURN) {
280 verdict = (unsigned)(-v) - 1;
281 break;
282 }
283 e = back;
284 back = get_entry(table_base,
285 back->comefrom);
286 continue;
287 }
288 if (table_base + v != (void *)e + e->next_offset
289 && !(e->ip.flags & IPT_F_GOTO)) {
290 /* Save old back ptr in next entry */
291 struct ipt_entry *next
292 = (void *)e + e->next_offset;
293 next->comefrom
294 = (void *)back - table_base;
295 /* set back pointer to next entry */
296 back = next;
297 }
298
299 e = get_entry(table_base, v);
300 } else {
301 /* Targets which reenter must return
302 abs. verdicts */
303 #ifdef CONFIG_NETFILTER_DEBUG
304 ((struct ipt_entry *)table_base)->comefrom
305 = 0xeeeeeeec;
306 #endif
307 verdict = t->u.kernel.target->target(pskb,
308 in, out,
309 hook,
310 t->u.kernel.target,
311 t->data,
312 userdata);
//調(diào)用模塊中定義的target函數(shù),返回一個(gè)verdict
313
314 #ifdef CONFIG_NETFILTER_DEBUG
315 if (((struct ipt_entry *)table_base)->comefrom
316 != 0xeeeeeeec
317 && verdict == IPT_CONTINUE) {
318 printk("Target %s reentered!\n",
319 t->u.kernel.target->name);
320 verdict = NF_DROP;
321 }
322 ((struct ipt_entry *)table_base)->comefrom
323 = 0x57acc001;
324 #endif
325 /* Target might have changed stuff. */
326 ip = (*pskb)->nh.iph;
327 datalen = (*pskb)->len - ip->ihl * 4;
328
329 if (verdict == IPT_CONTINUE)
330 e = (void *)e + e->next_offset;
331 else
332 /* Verdict */
333 break;
334 }
335 } else {
336
337 no_match:
338 e = (void *)e + e->next_offset;
//如果沒有匹配,則找到下一個(gè)ipt_entry
339 }
340 } while (!hotdrop);
341
342 read_unlock_bh(&table->lock);
343
344 #ifdef DEBUG_ALLOW_ALL
345 return NF_ACCEPT;
346 #else
347 if (hotdrop)
348 return NF_DROP;
349 else return verdict;
350 #endif
351 }
聯(lián)系客服