Differences
This shows you the differences between two versions of the page.
| Both sides previous revision Previous revision Next revision | Previous revision | ||
|
soc:2010:cooldavid:journal:week12 [2010/08/12 17:23] cooldavid |
soc:2010:cooldavid:journal:week12 [2010/08/13 01:31] (current) cooldavid |
||
|---|---|---|---|
| Line 1: | Line 1: | ||
| ==== Port wscale branch against current gPXE master ==== | ==== Port wscale branch against current gPXE master ==== | ||
| Recent TCP updates is a lot different from previous base of wscale branch. | Recent TCP updates is a lot different from previous base of wscale branch. | ||
| + | |||
| + | === Hit a GCC optimization trap === | ||
| + | |||
| + | == For the following code == | ||
| + | <code> | ||
| + | #include <stdio.h> | ||
| + | |||
| + | struct foo { | ||
| + | char huge_gap[0x4005d0]; | ||
| + | int v; | ||
| + | }; | ||
| + | |||
| + | void store_and_print(int *val, struct foo *f) | ||
| + | { | ||
| + | *val = f->v; | ||
| + | if ( !f ) { | ||
| + | printf("Value is wrong.\n"); | ||
| + | } else { | ||
| + | printf("Value is correct.\n"); | ||
| + | } | ||
| + | } | ||
| + | |||
| + | int main() | ||
| + | { | ||
| + | int unidentified_value; | ||
| + | |||
| + | printf("%p\n", main); | ||
| + | store_and_print(&unidentified_value, NULL); | ||
| + | |||
| + | return 0; | ||
| + | } | ||
| + | </code> | ||
| + | |||
| + | The value ''0x4005d0'' is from ''printf("%p\n", main);'' in order to get an | ||
| + | mapped memory address to prevent ''Segment fault'' error while running. | ||
| + | |||
| + | == The output if compiled with ''-O2'' == | ||
| + | <code> | ||
| + | $ ./null_deref_optimized_away | ||
| + | 0x4005d0 | ||
| + | Value is correct. | ||
| + | $ | ||
| + | </code> | ||
| + | And its assembly code: | ||
| + | <code> | ||
| + | void store_and_print(int *val, struct foo *f) | ||
| + | { | ||
| + | *val = f->v; | ||
| + | 4005b0: 8b 86 d0 05 40 00 mov 0x4005d0(%rsi),%eax | ||
| + | 4005b6: 89 07 mov %eax,(%rdi) | ||
| + | if ( !f ) { | ||
| + | printf("Value is wrong.\n"); | ||
| + | } else { | ||
| + | printf("Value is correct.\n"); | ||
| + | 4005b8: bf ec 06 40 00 mov $0x4006ec,%edi | ||
| + | 4005bd: e9 c6 fe ff ff jmpq 400488 <puts@plt> | ||
| + | 4005c2: 66 66 66 66 66 2e 0f nopw %cs:0x0(%rax,%rax,1) | ||
| + | 4005c9: 1f 84 00 00 00 00 00 | ||
| + | |||
| + | 00000000004005d0 <main>: | ||
| + | } | ||
| + | } | ||
| + | </code> | ||
| + | |||
| + | == The output if compiled with ''-O2 -fno-delete-null-pointer-checks'' == | ||
| + | <code> | ||
| + | $ ./null_deref_optimized_away | ||
| + | 0x4005d0 | ||
| + | Value is wrong. | ||
| + | $ | ||
| + | </code> | ||
| + | And its assembly code: | ||
| + | <code> | ||
| + | 00000000004005b0 <store_and_print>: | ||
| + | int v; | ||
| + | }; | ||
| + | |||
| + | void store_and_print(int *val, struct foo *f) | ||
| + | { | ||
| + | *val = f->v; | ||
| + | 4005b0: 8b 86 d0 05 40 00 mov 0x4005d0(%rsi),%eax | ||
| + | if ( !f ) { | ||
| + | 4005b6: 48 85 f6 test %rsi,%rsi | ||
| + | int v; | ||
| + | }; | ||
| + | |||
| + | void store_and_print(int *val, struct foo *f) | ||
| + | { | ||
| + | *val = f->v; | ||
| + | 4005b9: 89 07 mov %eax,(%rdi) | ||
| + | if ( !f ) { | ||
| + | 4005bb: 74 13 je 4005d0 <store_and_print+0x20> | ||
| + | printf("Value is wrong.\n"); | ||
| + | } else { | ||
| + | printf("Value is correct.\n"); | ||
| + | 4005bd: bf 0c 07 40 00 mov $0x40070c,%edi | ||
| + | 4005c2: e9 c1 fe ff ff jmpq 400488 <puts@plt> | ||
| + | 4005c7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) | ||
| + | 4005ce: 00 00 | ||
| + | |||
| + | void store_and_print(int *val, struct foo *f) | ||
| + | { | ||
| + | *val = f->v; | ||
| + | if ( !f ) { | ||
| + | printf("Value is wrong.\n"); | ||
| + | 4005d0: bf fc 06 40 00 mov $0x4006fc,%edi | ||
| + | 4005d5: e9 ae fe ff ff jmpq 400488 <puts@plt> | ||
| + | 4005da: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) | ||
| + | |||
| + | 00000000004005e0 <main>: | ||
| + | printf("Value is correct.\n"); | ||
| + | } | ||
| + | } | ||
| + | </code> | ||
| ==== Adding some facilities for testing convenience ==== | ==== Adding some facilities for testing convenience ==== | ||
| Line 8: | Line 122: | ||
| [[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/wscale2_test|wscale2 testing branch]] | [[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/wscale2_test|wscale2 testing branch]] | ||
| + | |||
| [[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/tcpfix2_test|tcpfix2 testing branch]] | [[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/tcpfix2_test|tcpfix2 testing branch]] | ||
| Line 16: | Line 131: | ||
| * Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware. | * Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware. | ||
| - | ^ Window Size ^ tcpfix2 branch ^^ wscale2 branch ^^ | + | ^ Window Size ^ tcpfix2 branch ^^ wscale2 branch ^^ sackws2 branch ^^ |
| - | ^ ^ KVM TAP/NAT ^ Native ^ KVM TAP/NAT ^ Native ^ | + | ^ ^ KVM TAP/NAT ^ Native ^ KVM TAP/NAT ^ Native ^ KVM TAP/NAT ^ Native ^ |
| - | | 8K| 17KB/s| 16KB/s| | 16KB/s| | + | | 8K| 16KB/s| 16KB/s| 16KB/s| 16KB/s| KB/s| KB/s| |
| - | | 16K| 43KB/s| 43KB/s| | 41KB/s| | + | | 16K| 41KB/s| 41KB/s| 41KB/s| 41KB/s| KB/s| KB/s| |
| - | | 32K| 94KB/s| 93KB/s| | | | + | | 32K| 90KB/s| 90KB/s| 90KB/s| 91KB/s| KB/s| KB/s| |
| - | | 64K| 198KB/s| 192KB/s| | | | + | | 64K| 186KB/s| 186KB/s| 187KB/s| 187KB/s| KB/s| KB/s| |
| - | | 128K| | | | 374KB/s| | + | | 128K| | | 374KB/s| 374KB/s| KB/s| KB/s| |
| - | | 256K| | | | 742KB/s| | + | | 256K| | | 470KB/s| 742KB/s| KB/s| KB/s| |
| - | | 512K| | | | | | + | | 512K| | | 450KB/s| [1]344KB/s| KB/s| KB/s| |
| - | | 1024K| | | | | | + | | 1024K| | | 498KB/s| [1]403KB/s| KB/s| KB/s| |
| + | |||
| + | [1]: While TCP rx window is large enough to hit the bandwidth limit(1MB/s), | ||
| + | packets start to drop. According to captured data, the retransmittion is very slow. | ||
| + | The sending host wait a long time for transmit next missing packet required by client. | ||
| + | This issue should be able to fixed with Selective ACK support. | ||