Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
soc:2010:cooldavid:journal:week12 [2010/08/12 17:35]
cooldavid
soc:2010:cooldavid:journal:week12 [2010/08/13 01:31]
cooldavid
Line 1: Line 1:
 ==== Port wscale branch against current gPXE master ==== ==== Port wscale branch against current gPXE master ====
 Recent TCP updates is a lot different from previous base of wscale branch. Recent TCP updates is a lot different from previous base of wscale branch.
 +
 +=== Hit a GCC optimization trap ===
 +
 +== For the following code ==
 +<​code>​
 +#include <​stdio.h>​
 +
 +struct foo {
 +        char huge_gap[0x4005d0];​
 +        int v;
 +};
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +        if ( !f ) {
 +                printf("​Value is wrong.\n"​);​
 +        } else {
 +                printf("​Value is correct.\n"​);​
 +        }
 +}
 +
 +int main()
 +{
 +        int unidentified_value;​
 +
 +        printf("​%p\n",​ main);
 +        store_and_print(&​unidentified_value,​ NULL);
 +
 +        return 0;
 +}
 +</​code>​
 +
 +The value ''​0x4005d0''​ is from ''​printf("​%p\n",​ main);''​ in order to get an
 +mapped memory address to prevent ''​Segment fault''​ error while running.
 +
 +== The output if compiled with ''​-O2''​ ==
 +<​code>​
 +$ ./​null_deref_optimized_away
 +0x4005d0
 +Value is correct.
 +
 +</​code>​
 +And its assembly code:
 +<​code>​
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +  4005b0: ​      8b 86 d0 05 40 00       ​mov ​   0x4005d0(%rsi),​%eax
 +  4005b6: ​      89 07                   ​mov ​   %eax,(%rdi)
 +        if ( !f ) {
 +                printf("​Value is wrong.\n"​);​
 +        } else {
 +                printf("​Value is correct.\n"​);​
 +  4005b8: ​      bf ec 06 40 00          mov    $0x4006ec,​%edi
 +  4005bd: ​      e9 c6 fe ff ff          jmpq   ​400488 <​puts@plt>​
 +  4005c2: ​      66 66 66 66 66 2e 0f    nopw   ​%cs:​0x0(%rax,​%rax,​1)
 +  4005c9: ​      1f 84 00 00 00 00 00
 +
 +00000000004005d0 <​main>:​
 +        }
 +}
 +</​code>​
 +
 +== The output if compiled with ''​-O2 -fno-delete-null-pointer-checks''​ ==
 +<​code>​
 +$ ./​null_deref_optimized_away
 +0x4005d0
 +Value is wrong.
 +
 +</​code>​
 +And its assembly code:
 +<​code>​
 +00000000004005b0 <​store_and_print>:​
 +        int v;
 +};
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +  4005b0: ​      8b 86 d0 05 40 00       ​mov ​   0x4005d0(%rsi),​%eax
 +        if ( !f ) {
 +  4005b6: ​      48 85 f6                test   ​%rsi,​%rsi
 +        int v;
 +};
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +  4005b9: ​      89 07                   ​mov ​   %eax,(%rdi)
 +        if ( !f ) {
 +  4005bb: ​      74 13                   ​je ​    ​4005d0 <​store_and_print+0x20>​
 +                printf("​Value is wrong.\n"​);​
 +        } else {
 +                printf("​Value is correct.\n"​);​
 +  4005bd: ​      bf 0c 07 40 00          mov    $0x40070c,​%edi
 +  4005c2: ​      e9 c1 fe ff ff          jmpq   ​400488 <​puts@plt>​
 +  4005c7: ​      66 0f 1f 84 00 00 00    nopw   ​0x0(%rax,​%rax,​1)
 +  4005ce: ​      00 00
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +        if ( !f ) {
 +                printf("​Value is wrong.\n"​);​
 +  4005d0: ​      bf fc 06 40 00          mov    $0x4006fc,​%edi
 +  4005d5: ​      e9 ae fe ff ff          jmpq   ​400488 <​puts@plt>​
 +  4005da: ​      66 0f 1f 44 00 00       ​nopw ​  ​0x0(%rax,​%rax,​1)
 +
 +00000000004005e0 <​main>:​
 +                printf("​Value is correct.\n"​);​
 +        }
 +}
 +</​code>​
  
 ==== Adding some facilities for testing convenience ==== ==== Adding some facilities for testing convenience ====
Line 8: Line 122:
  
 [[http://​git.etherboot.org/?​p=people/​cooldavid/​gpxe.git;​a=shortlog;​h=refs/​heads/​wscale2_test|wscale2 testing branch]] [[http://​git.etherboot.org/?​p=people/​cooldavid/​gpxe.git;​a=shortlog;​h=refs/​heads/​wscale2_test|wscale2 testing branch]]
 +
 [[http://​git.etherboot.org/?​p=people/​cooldavid/​gpxe.git;​a=shortlog;​h=refs/​heads/​tcpfix2_test|tcpfix2 testing branch]] [[http://​git.etherboot.org/?​p=people/​cooldavid/​gpxe.git;​a=shortlog;​h=refs/​heads/​tcpfix2_test|tcpfix2 testing branch]]
  
Line 16: Line 131:
   * Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware.   * Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware.
  
-^ Window Size ^   ​tcpfix2 branch ​   ^^    wscale2 branch ​   ^^ +^ Window Size ^   ​tcpfix2 branch ​    ​^^    wscale2 ​branch ​   ^^    sackws2 ​branch ​   ^^ 
-^             ^ KVM TAP/NAT ^  Native ^ KVM TAP/NAT ^  Native ^ +^             ^ KVM TAP/NAT ^  Native ​^ KVM TAP/NAT ^   ​Native ​  ^ KVM TAP/NAT ^  Native ^ 
-|           ​8K| ​      17KB/s|   ​16KB/​s| ​            ​  ​16KB/s| +|           ​8K| ​      16KB/s|   ​16KB/​s| ​      16KB/s     16KB/s|         ​KB/​s| ​    KB/s| 
-|          16K|       43KB/s|   43KB/s|             ​  ​41KB/s| +|          16K|       41KB/s|   41KB/s|       41KB/s     41KB/s|         ​KB/​s| ​    KB/s| 
-|          32K|       94KB/s|   93KB/s|             ​  ​91KB/s| +|          32K|       90KB/s|   90KB/s|       90KB/s     91KB/s|         ​KB/​s| ​    KB/s| 
-|          64K|      ​198KB/​s|  ​192KB/s|             ​ 187KB/s| +|          64K|      ​186KB/​s|  ​186KB/s|      ​187KB/​s    ​187KB/s|         ​KB/​s| ​    KB/s| 
-|         ​128K| ​            ​| ​        ​| ​            ​ 374KB/s| +|         ​128K| ​            ​| ​        ​| ​     ​374KB/​s    ​374KB/s|         ​KB/​s| ​    KB/s| 
-|         ​256K| ​            ​| ​        ​| ​            ​ 742KB/s| +|         ​256K| ​            ​| ​        ​| ​     ​470KB/​s    ​742KB/s|         ​KB/​s| ​    KB/s| 
-|         ​512K| ​            ​| ​        ​| ​            ​|         | +|         ​512K| ​            ​| ​        ​| ​     ​450KB/​s| ​ [1]344KB/s|         KB/s|     KB/s
-|        1024K| ​            ​| ​        ​| ​            ​|         |+|        1024K| ​            ​| ​        ​| ​     ​498KB/​s| ​ [1]403KB/s|         KB/s    KB/s| 
 + 
 +[1]: While TCP rx window is large enough to hit the bandwidth limit(1MB/​s),​ 
 +packets start to drop. According to captured data, the retransmittion is very slow. 
 +The sending host wait a long time for transmit next missing packet required by client. 
 +This issue should be able to fixed with Selective ACK support. 
  

QR Code
QR Code soc:2010:cooldavid:journal:week12 (generated for current page)