Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision Next revision | Previous revision | ||
soc:2010:cooldavid:journal:week12 [2010/08/12 13:36] cooldavid |
soc:2010:cooldavid:journal:week12 [2010/08/13 01:31] (current) cooldavid |
||
---|---|---|---|
Line 1: | Line 1: | ||
==== Port wscale branch against current gPXE master ==== | ==== Port wscale branch against current gPXE master ==== | ||
Recent TCP updates is a lot different from previous base of wscale branch. | Recent TCP updates is a lot different from previous base of wscale branch. | ||
+ | |||
+ | === Hit a GCC optimization trap === | ||
+ | |||
+ | == For the following code == | ||
+ | <code> | ||
+ | #include <stdio.h> | ||
+ | |||
+ | struct foo { | ||
+ | char huge_gap[0x4005d0]; | ||
+ | int v; | ||
+ | }; | ||
+ | |||
+ | void store_and_print(int *val, struct foo *f) | ||
+ | { | ||
+ | *val = f->v; | ||
+ | if ( !f ) { | ||
+ | printf("Value is wrong.\n"); | ||
+ | } else { | ||
+ | printf("Value is correct.\n"); | ||
+ | } | ||
+ | } | ||
+ | |||
+ | int main() | ||
+ | { | ||
+ | int unidentified_value; | ||
+ | |||
+ | printf("%p\n", main); | ||
+ | store_and_print(&unidentified_value, NULL); | ||
+ | |||
+ | return 0; | ||
+ | } | ||
+ | </code> | ||
+ | |||
+ | The value ''0x4005d0'' is from ''printf("%p\n", main);'' in order to get an | ||
+ | mapped memory address to prevent ''Segment fault'' error while running. | ||
+ | |||
+ | == The output if compiled with ''-O2'' == | ||
+ | <code> | ||
+ | $ ./null_deref_optimized_away | ||
+ | 0x4005d0 | ||
+ | Value is correct. | ||
+ | $ | ||
+ | </code> | ||
+ | And its assembly code: | ||
+ | <code> | ||
+ | void store_and_print(int *val, struct foo *f) | ||
+ | { | ||
+ | *val = f->v; | ||
+ | 4005b0: 8b 86 d0 05 40 00 mov 0x4005d0(%rsi),%eax | ||
+ | 4005b6: 89 07 mov %eax,(%rdi) | ||
+ | if ( !f ) { | ||
+ | printf("Value is wrong.\n"); | ||
+ | } else { | ||
+ | printf("Value is correct.\n"); | ||
+ | 4005b8: bf ec 06 40 00 mov $0x4006ec,%edi | ||
+ | 4005bd: e9 c6 fe ff ff jmpq 400488 <puts@plt> | ||
+ | 4005c2: 66 66 66 66 66 2e 0f nopw %cs:0x0(%rax,%rax,1) | ||
+ | 4005c9: 1f 84 00 00 00 00 00 | ||
+ | |||
+ | 00000000004005d0 <main>: | ||
+ | } | ||
+ | } | ||
+ | </code> | ||
+ | |||
+ | == The output if compiled with ''-O2 -fno-delete-null-pointer-checks'' == | ||
+ | <code> | ||
+ | $ ./null_deref_optimized_away | ||
+ | 0x4005d0 | ||
+ | Value is wrong. | ||
+ | $ | ||
+ | </code> | ||
+ | And its assembly code: | ||
+ | <code> | ||
+ | 00000000004005b0 <store_and_print>: | ||
+ | int v; | ||
+ | }; | ||
+ | |||
+ | void store_and_print(int *val, struct foo *f) | ||
+ | { | ||
+ | *val = f->v; | ||
+ | 4005b0: 8b 86 d0 05 40 00 mov 0x4005d0(%rsi),%eax | ||
+ | if ( !f ) { | ||
+ | 4005b6: 48 85 f6 test %rsi,%rsi | ||
+ | int v; | ||
+ | }; | ||
+ | |||
+ | void store_and_print(int *val, struct foo *f) | ||
+ | { | ||
+ | *val = f->v; | ||
+ | 4005b9: 89 07 mov %eax,(%rdi) | ||
+ | if ( !f ) { | ||
+ | 4005bb: 74 13 je 4005d0 <store_and_print+0x20> | ||
+ | printf("Value is wrong.\n"); | ||
+ | } else { | ||
+ | printf("Value is correct.\n"); | ||
+ | 4005bd: bf 0c 07 40 00 mov $0x40070c,%edi | ||
+ | 4005c2: e9 c1 fe ff ff jmpq 400488 <puts@plt> | ||
+ | 4005c7: 66 0f 1f 84 00 00 00 nopw 0x0(%rax,%rax,1) | ||
+ | 4005ce: 00 00 | ||
+ | |||
+ | void store_and_print(int *val, struct foo *f) | ||
+ | { | ||
+ | *val = f->v; | ||
+ | if ( !f ) { | ||
+ | printf("Value is wrong.\n"); | ||
+ | 4005d0: bf fc 06 40 00 mov $0x4006fc,%edi | ||
+ | 4005d5: e9 ae fe ff ff jmpq 400488 <puts@plt> | ||
+ | 4005da: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1) | ||
+ | |||
+ | 00000000004005e0 <main>: | ||
+ | printf("Value is correct.\n"); | ||
+ | } | ||
+ | } | ||
+ | </code> | ||
==== Adding some facilities for testing convenience ==== | ==== Adding some facilities for testing convenience ==== | ||
Line 8: | Line 122: | ||
[[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/wscale2_test|wscale2 testing branch]] | [[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/wscale2_test|wscale2 testing branch]] | ||
+ | |||
[[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/tcpfix2_test|tcpfix2 testing branch]] | [[http://git.etherboot.org/?p=people/cooldavid/gpxe.git;a=shortlog;h=refs/heads/tcpfix2_test|tcpfix2 testing branch]] | ||
Line 16: | Line 131: | ||
* Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware. | * Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware. | ||
- | ^ Window Size ^ tcpfix2 branch ^^ wscale branch ^^ | + | ^ Window Size ^ tcpfix2 branch ^^ wscale2 branch ^^ sackws2 branch ^^ |
- | ^ ^ KVM TAP/NAT ^ Native ^ KVM TAP/NAT ^ Native ^ | + | ^ ^ KVM TAP/NAT ^ Native ^ KVM TAP/NAT ^ Native ^ KVM TAP/NAT ^ Native ^ |
- | | 8K| 17KB/s| 16KB/s| | | | + | | 8K| 16KB/s| 16KB/s| 16KB/s| 16KB/s| KB/s| KB/s| |
- | | 16K| 43KB/s| 43KB/s| | | | + | | 16K| 41KB/s| 41KB/s| 41KB/s| 41KB/s| KB/s| KB/s| |
- | | 32K| 94KB/s| 93KB/s| | | | + | | 32K| 90KB/s| 90KB/s| 90KB/s| 91KB/s| KB/s| KB/s| |
- | | 64K| 198KB/s| 192KB/s| | | | + | | 64K| 186KB/s| 186KB/s| 187KB/s| 187KB/s| KB/s| KB/s| |
- | | 128K| | | | 374KB/s| | + | | 128K| | | 374KB/s| 374KB/s| KB/s| KB/s| |
- | | 256K| | | | 742KB/s| | + | | 256K| | | 470KB/s| 742KB/s| KB/s| KB/s| |
- | | 512K| | | | | | + | | 512K| | | 450KB/s| [1]344KB/s| KB/s| KB/s| |
- | | 1024K| | | | | | + | | 1024K| | | 498KB/s| [1]403KB/s| KB/s| KB/s| |
+ | |||
+ | [1]: While TCP rx window is large enough to hit the bandwidth limit(1MB/s), | ||
+ | packets start to drop. According to captured data, the retransmittion is very slow. | ||
+ | The sending host wait a long time for transmit next missing packet required by client. | ||
+ | This issue should be able to fixed with Selective ACK support. | ||