Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Next revision
Previous revision
Last revision Both sides next revision
soc:2010:cooldavid:journal:week12 [2010/08/12 13:29]
cooldavid
soc:2010:cooldavid:journal:week12 [2010/08/13 00:54]
cooldavid
Line 1: Line 1:
 ==== Port wscale branch against current gPXE master ==== ==== Port wscale branch against current gPXE master ====
 Recent TCP updates is a lot different from previous base of wscale branch. Recent TCP updates is a lot different from previous base of wscale branch.
 +
 +=== Hit a GCC optimization trap ===
 +
 +== For the following code ==
 +<​code>​
 +#include <​stdio.h>​
 +
 +struct foo {
 +        char huge_gap[0x4005d0];​
 +        int v;
 +};
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +        if ( !f ) {
 +                printf("​Value is wrong.\n"​);​
 +        } else {
 +                printf("​Value is correct.\n"​);​
 +        }
 +}
 +
 +int main()
 +{
 +        int unidentified_value;​
 +
 +        printf("​%p\n",​ main);
 +        store_and_print(&​unidentified_value,​ NULL);
 +
 +        return 0;
 +}
 +</​code>​
 +
 +The value ''​0x4005d0''​ is from ''​printf("​%p\n",​ main);''​ in order to get an
 +mapped memory address to prevent ''​Segment fault''​ error while running.
 +
 +== The output if compiled with ''​-O2''​ ==
 +<​code>​
 +$ ./​null_deref_optimized_away
 +0x4005d0
 +Value is correct.
 +
 +</​code>​
 +And its assembly code:
 +<​code>​
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +  4005b0: ​      8b 86 d0 05 40 00       ​mov ​   0x4005d0(%rsi),​%eax
 +  4005b6: ​      89 07                   ​mov ​   %eax,(%rdi)
 +        if ( !f ) {
 +                printf("​Value is wrong.\n"​);​
 +        } else {
 +                printf("​Value is correct.\n"​);​
 +  4005b8: ​      bf ec 06 40 00          mov    $0x4006ec,​%edi
 +  4005bd: ​      e9 c6 fe ff ff          jmpq   ​400488 <​puts@plt>​
 +  4005c2: ​      66 66 66 66 66 2e 0f    nopw   ​%cs:​0x0(%rax,​%rax,​1)
 +  4005c9: ​      1f 84 00 00 00 00 00
 +
 +00000000004005d0 <​main>:​
 +        }
 +}
 +</​code>​
 +
 +== The output if compiled with ''​-O2 -fno-delete-null-pointer-checks''​ ==
 +<​code>​
 +$ ./​null_deref_optimized_away
 +0x4005d0
 +Value is wrong.
 +
 +</​code>​
 +And its assembly code:
 +<​code>​
 +00000000004005b0 <​store_and_print>:​
 +        int v;
 +};
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +  4005b0: ​      8b 86 d0 05 40 00       ​mov ​   0x4005d0(%rsi),​%eax
 +        if ( !f ) {
 +  4005b6: ​      48 85 f6                test   ​%rsi,​%rsi
 +        int v;
 +};
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +  4005b9: ​      89 07                   ​mov ​   %eax,(%rdi)
 +        if ( !f ) {
 +  4005bb: ​      74 13                   ​je ​    ​4005d0 <​store_and_print+0x20>​
 +                printf("​Value is wrong.\n"​);​
 +        } else {
 +                printf("​Value is correct.\n"​);​
 +  4005bd: ​      bf 0c 07 40 00          mov    $0x40070c,​%edi
 +  4005c2: ​      e9 c1 fe ff ff          jmpq   ​400488 <​puts@plt>​
 +  4005c7: ​      66 0f 1f 84 00 00 00    nopw   ​0x0(%rax,​%rax,​1)
 +  4005ce: ​      00 00
 +
 +void store_and_print(int *val, struct foo *f)
 +{
 +        *val = f->v;
 +        if ( !f ) {
 +                printf("​Value is wrong.\n"​);​
 +  4005d0: ​      bf fc 06 40 00          mov    $0x4006fc,​%edi
 +  4005d5: ​      e9 ae fe ff ff          jmpq   ​400488 <​puts@plt>​
 +  4005da: ​      66 0f 1f 44 00 00       ​nopw ​  ​0x0(%rax,​%rax,​1)
 +
 +00000000004005e0 <​main>:​
 +                printf("​Value is correct.\n"​);​
 +        }
 +}
 +</​code>​
 +
 +==== Adding some facilities for testing convenience ====
 +  * Add a command to modify TCP window size at runtime.
 +  * print "​\b%c"​ instead of "​."​ in monojob, keep the console clean.
 +  * Add a ''​pause''​ command.
 +
 +[[http://​git.etherboot.org/?​p=people/​cooldavid/​gpxe.git;​a=shortlog;​h=refs/​heads/​wscale2_test|wscale2 testing branch]]
 +
 +[[http://​git.etherboot.org/?​p=people/​cooldavid/​gpxe.git;​a=shortlog;​h=refs/​heads/​tcpfix2_test|tcpfix2 testing branch]]
  
 ==== Window size difference ==== ==== Window size difference ====
Line 8: Line 131:
   * Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware.   * Tested it with emulated rtl8139 of KVM, and Native on rtl8139 hardware.
  
-^ Window Size ^   ​tcpfix2 branch ​   ^^    ​wscale ​branch ​   ^^ +^ Window Size ^   ​tcpfix2 ​branch ​    ​^^ ​   wscale2 ​branch ​   ^^    ​sackws2 ​branch ​   ^^ 
-^             ^ KVM TAP/NAT ^  Native ^ KVM TAP/NAT ^  Native ^ +^             ^ KVM TAP/NAT ^  Native ​^ KVM TAP/NAT ^   ​Native ​  ^ KVM TAP/NAT ^  Native ^ 
-|           ​8K| ​      17KB/s|   ​16KB/​s| ​            ​|         | +|           ​8K| ​      16KB/s|   ​16KB/​s| ​      16KB/​s| ​     16KB/s|         KB/s|     KB/s
-|          16K|       43KB/s|   43KB/s|             ​|         | +|          16K|       41KB/s|   41KB/s|       41KB/​s| ​     41KB/s|         KB/s|     KB/s
-|          32K|       ​94KB/​s| ​  ​93KB/​s| ​            ​|         | +|          32K|       ​94KB/​s| ​  ​93KB/​s| ​      90KB/​s| ​     91KB/s|         KB/s|     KB/s
-|          64K|      198KB/​s| ​ 192KB/​s| ​            ​|         | +|          64K|      198KB/​s| ​ 192KB/​s| ​     ​187KB/​s| ​    ​187KB/​s|         KB/s|     KB/s
-|         ​128K| ​            ​| ​        ​| ​            ​ 374KB/s| +|         ​128K| ​            ​| ​        ​| ​     ​374KB/​s    ​374KB/s|         ​KB/​s| ​    KB/s| 
-|         ​256K| ​            ​| ​        ​| ​            ​ 742KB/s| +|         ​256K| ​            ​| ​        ​| ​     ​470KB/​s    ​742KB/s|         ​KB/​s| ​    KB/s| 
-|         ​512K| ​            ​| ​        ​| ​            ​|         | +|         ​512K| ​            ​| ​        ​| ​     ​450KB/​s| ​ [1]344KB/s|         KB/s|     KB/s
-|        1024K| ​            ​| ​        ​| ​            ​|         |+|        1024K| ​            ​| ​        ​| ​     ​498KB/​s| ​ [1]403KB/s|         KB/s    KB/s| 
 + 
 +[1]: While TCP rx window is large enough to hit the bandwidth limit(1MB/​s),​ 
 +packets start to drop. According to captured data, the retransmittion is very slow. 
 +The sending host wait a long time for transmit next missing packet required by client. 
 +This issue should be able to fixed with Selective ACK support. 
  

QR Code
QR Code soc:2010:cooldavid:journal:week12 (generated for current page)