1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "net/socket/tcp_socket.h"
6 #include "net/socket/tcp_socket_win.h"
7
8 #include <mstcpip.h>
9
10 #include "base/callback_helpers.h"
11 #include "base/logging.h"
12 #include "base/metrics/stats_counters.h"
13 #include "base/win/windows_version.h"
14 #include "net/base/address_list.h"
15 #include "net/base/connection_type_histograms.h"
16 #include "net/base/io_buffer.h"
17 #include "net/base/ip_endpoint.h"
18 #include "net/base/net_errors.h"
19 #include "net/base/net_util.h"
20 #include "net/base/network_change_notifier.h"
21 #include "net/base/winsock_init.h"
22 #include "net/base/winsock_util.h"
23 #include "net/socket/socket_descriptor.h"
24 #include "net/socket/socket_net_log_params.h"
25
26 namespace net {
27
28 namespace {
29
30 const int kTCPKeepAliveSeconds = 45;
31
SetSocketReceiveBufferSize(SOCKET socket,int32 size)32 int SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
33 int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
34 reinterpret_cast<const char*>(&size), sizeof(size));
35 int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
36 DCHECK(!rv) << "Could not set socket receive buffer size: " << net_error;
37 return net_error;
38 }
39
SetSocketSendBufferSize(SOCKET socket,int32 size)40 int SetSocketSendBufferSize(SOCKET socket, int32 size) {
41 int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
42 reinterpret_cast<const char*>(&size), sizeof(size));
43 int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
44 DCHECK(!rv) << "Could not set socket send buffer size: " << net_error;
45 return net_error;
46 }
47
48 // Disable Nagle.
49 // The Nagle implementation on windows is governed by RFC 896. The idea
50 // behind Nagle is to reduce small packets on the network. When Nagle is
51 // enabled, if a partial packet has been sent, the TCP stack will disallow
52 // further *partial* packets until an ACK has been received from the other
53 // side. Good applications should always strive to send as much data as
54 // possible and avoid partial-packet sends. However, in most real world
55 // applications, there are edge cases where this does not happen, and two
56 // partial packets may be sent back to back. For a browser, it is NEVER
57 // a benefit to delay for an RTT before the second packet is sent.
58 //
59 // As a practical example in Chromium today, consider the case of a small
60 // POST. I have verified this:
61 // Client writes 649 bytes of header (partial packet #1)
62 // Client writes 50 bytes of POST data (partial packet #2)
63 // In the above example, with Nagle, a RTT delay is inserted between these
64 // two sends due to nagle. RTTs can easily be 100ms or more. The best
65 // fix is to make sure that for POSTing data, we write as much data as
66 // possible and minimize partial packets. We will fix that. But disabling
67 // Nagle also ensure we don't run into this delay in other edge cases.
68 // See also:
69 // http://technet.microsoft.com/en-us/library/bb726981.aspx
DisableNagle(SOCKET socket,bool disable)70 bool DisableNagle(SOCKET socket, bool disable) {
71 BOOL val = disable ? TRUE : FALSE;
72 int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
73 reinterpret_cast<const char*>(&val),
74 sizeof(val));
75 DCHECK(!rv) << "Could not disable nagle";
76 return rv == 0;
77 }
78
79 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
80 // connections. See http://crbug.com/27400 for details.
SetTCPKeepAlive(SOCKET socket,BOOL enable,int delay_secs)81 bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
82 int delay = delay_secs * 1000;
83 struct tcp_keepalive keepalive_vals = {
84 enable ? 1 : 0, // TCP keep-alive on.
85 delay, // Delay seconds before sending first TCP keep-alive packet.
86 delay, // Delay seconds between sending TCP keep-alive packets.
87 };
88 DWORD bytes_returned = 0xABAB;
89 int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
90 sizeof(keepalive_vals), NULL, 0,
91 &bytes_returned, NULL, NULL);
92 DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
93 << " [error: " << WSAGetLastError() << "].";
94
95 // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
96 return rv == 0;
97 }
98
MapConnectError(int os_error)99 int MapConnectError(int os_error) {
100 switch (os_error) {
101 // connect fails with WSAEACCES when Windows Firewall blocks the
102 // connection.
103 case WSAEACCES:
104 return ERR_NETWORK_ACCESS_DENIED;
105 case WSAETIMEDOUT:
106 return ERR_CONNECTION_TIMED_OUT;
107 default: {
108 int net_error = MapSystemError(os_error);
109 if (net_error == ERR_FAILED)
110 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED.
111
112 // Give a more specific error when the user is offline.
113 if (net_error == ERR_ADDRESS_UNREACHABLE &&
114 NetworkChangeNotifier::IsOffline()) {
115 return ERR_INTERNET_DISCONNECTED;
116 }
117
118 return net_error;
119 }
120 }
121 }
122
123 } // namespace
124
125 //-----------------------------------------------------------------------------
126
127 // Nothing to do for Windows since it doesn't support TCP FastOpen.
128 // TODO(jri): Remove these along with the corresponding global variables.
IsTCPFastOpenSupported()129 bool IsTCPFastOpenSupported() { return false; }
IsTCPFastOpenUserEnabled()130 bool IsTCPFastOpenUserEnabled() { return false; }
CheckSupportAndMaybeEnableTCPFastOpen(bool user_enabled)131 void CheckSupportAndMaybeEnableTCPFastOpen(bool user_enabled) {}
132
133 // This class encapsulates all the state that has to be preserved as long as
134 // there is a network IO operation in progress. If the owner TCPSocketWin is
135 // destroyed while an operation is in progress, the Core is detached and it
136 // lives until the operation completes and the OS doesn't reference any resource
137 // declared on this class anymore.
138 class TCPSocketWin::Core : public base::RefCounted<Core> {
139 public:
140 explicit Core(TCPSocketWin* socket);
141
142 // Start watching for the end of a read or write operation.
143 void WatchForRead();
144 void WatchForWrite();
145
146 // The TCPSocketWin is going away.
Detach()147 void Detach() { socket_ = NULL; }
148
149 // The separate OVERLAPPED variables for asynchronous operation.
150 // |read_overlapped_| is used for both Connect() and Read().
151 // |write_overlapped_| is only used for Write();
152 OVERLAPPED read_overlapped_;
153 OVERLAPPED write_overlapped_;
154
155 // The buffers used in Read() and Write().
156 scoped_refptr<IOBuffer> read_iobuffer_;
157 scoped_refptr<IOBuffer> write_iobuffer_;
158 int read_buffer_length_;
159 int write_buffer_length_;
160
161 bool non_blocking_reads_initialized_;
162
163 private:
164 friend class base::RefCounted<Core>;
165
166 class ReadDelegate : public base::win::ObjectWatcher::Delegate {
167 public:
ReadDelegate(Core * core)168 explicit ReadDelegate(Core* core) : core_(core) {}
~ReadDelegate()169 virtual ~ReadDelegate() {}
170
171 // base::ObjectWatcher::Delegate methods:
172 virtual void OnObjectSignaled(HANDLE object);
173
174 private:
175 Core* const core_;
176 };
177
178 class WriteDelegate : public base::win::ObjectWatcher::Delegate {
179 public:
WriteDelegate(Core * core)180 explicit WriteDelegate(Core* core) : core_(core) {}
~WriteDelegate()181 virtual ~WriteDelegate() {}
182
183 // base::ObjectWatcher::Delegate methods:
184 virtual void OnObjectSignaled(HANDLE object);
185
186 private:
187 Core* const core_;
188 };
189
190 ~Core();
191
192 // The socket that created this object.
193 TCPSocketWin* socket_;
194
195 // |reader_| handles the signals from |read_watcher_|.
196 ReadDelegate reader_;
197 // |writer_| handles the signals from |write_watcher_|.
198 WriteDelegate writer_;
199
200 // |read_watcher_| watches for events from Connect() and Read().
201 base::win::ObjectWatcher read_watcher_;
202 // |write_watcher_| watches for events from Write();
203 base::win::ObjectWatcher write_watcher_;
204
205 DISALLOW_COPY_AND_ASSIGN(Core);
206 };
207
Core(TCPSocketWin * socket)208 TCPSocketWin::Core::Core(TCPSocketWin* socket)
209 : read_buffer_length_(0),
210 write_buffer_length_(0),
211 non_blocking_reads_initialized_(false),
212 socket_(socket),
213 reader_(this),
214 writer_(this) {
215 memset(&read_overlapped_, 0, sizeof(read_overlapped_));
216 memset(&write_overlapped_, 0, sizeof(write_overlapped_));
217
218 read_overlapped_.hEvent = WSACreateEvent();
219 write_overlapped_.hEvent = WSACreateEvent();
220 }
221
~Core()222 TCPSocketWin::Core::~Core() {
223 // Make sure the message loop is not watching this object anymore.
224 read_watcher_.StopWatching();
225 write_watcher_.StopWatching();
226
227 WSACloseEvent(read_overlapped_.hEvent);
228 memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
229 WSACloseEvent(write_overlapped_.hEvent);
230 memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
231 }
232
WatchForRead()233 void TCPSocketWin::Core::WatchForRead() {
234 // We grab an extra reference because there is an IO operation in progress.
235 // Balanced in ReadDelegate::OnObjectSignaled().
236 AddRef();
237 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
238 }
239
WatchForWrite()240 void TCPSocketWin::Core::WatchForWrite() {
241 // We grab an extra reference because there is an IO operation in progress.
242 // Balanced in WriteDelegate::OnObjectSignaled().
243 AddRef();
244 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
245 }
246
OnObjectSignaled(HANDLE object)247 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
248 DCHECK_EQ(object, core_->read_overlapped_.hEvent);
249 if (core_->socket_) {
250 if (core_->socket_->waiting_connect_)
251 core_->socket_->DidCompleteConnect();
252 else
253 core_->socket_->DidSignalRead();
254 }
255
256 core_->Release();
257 }
258
OnObjectSignaled(HANDLE object)259 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
260 HANDLE object) {
261 DCHECK_EQ(object, core_->write_overlapped_.hEvent);
262 if (core_->socket_)
263 core_->socket_->DidCompleteWrite();
264
265 core_->Release();
266 }
267
268 //-----------------------------------------------------------------------------
269
TCPSocketWin(net::NetLog * net_log,const net::NetLog::Source & source)270 TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
271 const net::NetLog::Source& source)
272 : socket_(INVALID_SOCKET),
273 accept_event_(WSA_INVALID_EVENT),
274 accept_socket_(NULL),
275 accept_address_(NULL),
276 waiting_connect_(false),
277 waiting_read_(false),
278 waiting_write_(false),
279 connect_os_error_(0),
280 logging_multiple_connect_attempts_(false),
281 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
282 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
283 source.ToEventParametersCallback());
284 EnsureWinsockInit();
285 }
286
~TCPSocketWin()287 TCPSocketWin::~TCPSocketWin() {
288 Close();
289 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
290 }
291
Open(AddressFamily family)292 int TCPSocketWin::Open(AddressFamily family) {
293 DCHECK(CalledOnValidThread());
294 DCHECK_EQ(socket_, INVALID_SOCKET);
295
296 socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
297 IPPROTO_TCP);
298 if (socket_ == INVALID_SOCKET) {
299 PLOG(ERROR) << "CreatePlatformSocket() returned an error";
300 return MapSystemError(WSAGetLastError());
301 }
302
303 if (SetNonBlocking(socket_)) {
304 int result = MapSystemError(WSAGetLastError());
305 Close();
306 return result;
307 }
308
309 return OK;
310 }
311
AdoptConnectedSocket(SOCKET socket,const IPEndPoint & peer_address)312 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
313 const IPEndPoint& peer_address) {
314 DCHECK(CalledOnValidThread());
315 DCHECK_EQ(socket_, INVALID_SOCKET);
316 DCHECK(!core_);
317
318 socket_ = socket;
319
320 if (SetNonBlocking(socket_)) {
321 int result = MapSystemError(WSAGetLastError());
322 Close();
323 return result;
324 }
325
326 core_ = new Core(this);
327 peer_address_.reset(new IPEndPoint(peer_address));
328
329 return OK;
330 }
331
AdoptListenSocket(SOCKET socket)332 int TCPSocketWin::AdoptListenSocket(SOCKET socket) {
333 DCHECK(CalledOnValidThread());
334 DCHECK_EQ(socket_, INVALID_SOCKET);
335
336 socket_ = socket;
337
338 if (SetNonBlocking(socket_)) {
339 int result = MapSystemError(WSAGetLastError());
340 Close();
341 return result;
342 }
343
344 // |core_| is not needed for sockets that are used to accept connections.
345 // The operation here is more like Open but with an existing socket.
346
347 return OK;
348 }
349
Bind(const IPEndPoint & address)350 int TCPSocketWin::Bind(const IPEndPoint& address) {
351 DCHECK(CalledOnValidThread());
352 DCHECK_NE(socket_, INVALID_SOCKET);
353
354 SockaddrStorage storage;
355 if (!address.ToSockAddr(storage.addr, &storage.addr_len))
356 return ERR_ADDRESS_INVALID;
357
358 int result = bind(socket_, storage.addr, storage.addr_len);
359 if (result < 0) {
360 PLOG(ERROR) << "bind() returned an error";
361 return MapSystemError(WSAGetLastError());
362 }
363
364 return OK;
365 }
366
Listen(int backlog)367 int TCPSocketWin::Listen(int backlog) {
368 DCHECK(CalledOnValidThread());
369 DCHECK_GT(backlog, 0);
370 DCHECK_NE(socket_, INVALID_SOCKET);
371 DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);
372
373 accept_event_ = WSACreateEvent();
374 if (accept_event_ == WSA_INVALID_EVENT) {
375 PLOG(ERROR) << "WSACreateEvent()";
376 return MapSystemError(WSAGetLastError());
377 }
378
379 int result = listen(socket_, backlog);
380 if (result < 0) {
381 PLOG(ERROR) << "listen() returned an error";
382 return MapSystemError(WSAGetLastError());
383 }
384
385 return OK;
386 }
387
Accept(scoped_ptr<TCPSocketWin> * socket,IPEndPoint * address,const CompletionCallback & callback)388 int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
389 IPEndPoint* address,
390 const CompletionCallback& callback) {
391 DCHECK(CalledOnValidThread());
392 DCHECK(socket);
393 DCHECK(address);
394 DCHECK(!callback.is_null());
395 DCHECK(accept_callback_.is_null());
396
397 net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
398
399 int result = AcceptInternal(socket, address);
400
401 if (result == ERR_IO_PENDING) {
402 // Start watching.
403 WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
404 accept_watcher_.StartWatching(accept_event_, this);
405
406 accept_socket_ = socket;
407 accept_address_ = address;
408 accept_callback_ = callback;
409 }
410
411 return result;
412 }
413
Connect(const IPEndPoint & address,const CompletionCallback & callback)414 int TCPSocketWin::Connect(const IPEndPoint& address,
415 const CompletionCallback& callback) {
416 DCHECK(CalledOnValidThread());
417 DCHECK_NE(socket_, INVALID_SOCKET);
418 DCHECK(!waiting_connect_);
419
420 // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
421 // Unless Close() is called to reset the internal state, a second call to
422 // Connect() is not allowed.
423 // Please note that we enforce this even if the previous Connect() has
424 // completed and failed. Although it is allowed to connect the same |socket_|
425 // again after a connection attempt failed on Windows, it results in
426 // unspecified behavior according to POSIX. Therefore, we make it behave in
427 // the same way as TCPSocketLibevent.
428 DCHECK(!peer_address_ && !core_);
429
430 if (!logging_multiple_connect_attempts_)
431 LogConnectBegin(AddressList(address));
432
433 peer_address_.reset(new IPEndPoint(address));
434
435 int rv = DoConnect();
436 if (rv == ERR_IO_PENDING) {
437 // Synchronous operation not supported.
438 DCHECK(!callback.is_null());
439 read_callback_ = callback;
440 waiting_connect_ = true;
441 } else {
442 DoConnectComplete(rv);
443 }
444
445 return rv;
446 }
447
IsConnected() const448 bool TCPSocketWin::IsConnected() const {
449 DCHECK(CalledOnValidThread());
450
451 if (socket_ == INVALID_SOCKET || waiting_connect_)
452 return false;
453
454 if (waiting_read_)
455 return true;
456
457 // Check if connection is alive.
458 char c;
459 int rv = recv(socket_, &c, 1, MSG_PEEK);
460 if (rv == 0)
461 return false;
462 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
463 return false;
464
465 return true;
466 }
467
IsConnectedAndIdle() const468 bool TCPSocketWin::IsConnectedAndIdle() const {
469 DCHECK(CalledOnValidThread());
470
471 if (socket_ == INVALID_SOCKET || waiting_connect_)
472 return false;
473
474 if (waiting_read_)
475 return true;
476
477 // Check if connection is alive and we haven't received any data
478 // unexpectedly.
479 char c;
480 int rv = recv(socket_, &c, 1, MSG_PEEK);
481 if (rv >= 0)
482 return false;
483 if (WSAGetLastError() != WSAEWOULDBLOCK)
484 return false;
485
486 return true;
487 }
488
Read(IOBuffer * buf,int buf_len,const CompletionCallback & callback)489 int TCPSocketWin::Read(IOBuffer* buf,
490 int buf_len,
491 const CompletionCallback& callback) {
492 DCHECK(CalledOnValidThread());
493 DCHECK_NE(socket_, INVALID_SOCKET);
494 DCHECK(!waiting_read_);
495 DCHECK(read_callback_.is_null());
496 DCHECK(!core_->read_iobuffer_);
497
498 return DoRead(buf, buf_len, callback);
499 }
500
Write(IOBuffer * buf,int buf_len,const CompletionCallback & callback)501 int TCPSocketWin::Write(IOBuffer* buf,
502 int buf_len,
503 const CompletionCallback& callback) {
504 DCHECK(CalledOnValidThread());
505 DCHECK_NE(socket_, INVALID_SOCKET);
506 DCHECK(!waiting_write_);
507 DCHECK(write_callback_.is_null());
508 DCHECK_GT(buf_len, 0);
509 DCHECK(!core_->write_iobuffer_);
510
511 base::StatsCounter writes("tcp.writes");
512 writes.Increment();
513
514 WSABUF write_buffer;
515 write_buffer.len = buf_len;
516 write_buffer.buf = buf->data();
517
518 // TODO(wtc): Remove the assertion after enough testing.
519 AssertEventNotSignaled(core_->write_overlapped_.hEvent);
520 DWORD num;
521 int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
522 &core_->write_overlapped_, NULL);
523 if (rv == 0) {
524 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
525 rv = static_cast<int>(num);
526 if (rv > buf_len || rv < 0) {
527 // It seems that some winsock interceptors report that more was written
528 // than was available. Treat this as an error. http://crbug.com/27870
529 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
530 << " bytes, but " << rv << " bytes reported.";
531 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
532 }
533 base::StatsCounter write_bytes("tcp.write_bytes");
534 write_bytes.Add(rv);
535 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
536 buf->data());
537 return rv;
538 }
539 } else {
540 int os_error = WSAGetLastError();
541 if (os_error != WSA_IO_PENDING) {
542 int net_error = MapSystemError(os_error);
543 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
544 CreateNetLogSocketErrorCallback(net_error, os_error));
545 return net_error;
546 }
547 }
548 waiting_write_ = true;
549 write_callback_ = callback;
550 core_->write_iobuffer_ = buf;
551 core_->write_buffer_length_ = buf_len;
552 core_->WatchForWrite();
553 return ERR_IO_PENDING;
554 }
555
GetLocalAddress(IPEndPoint * address) const556 int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
557 DCHECK(CalledOnValidThread());
558 DCHECK(address);
559
560 SockaddrStorage storage;
561 if (getsockname(socket_, storage.addr, &storage.addr_len))
562 return MapSystemError(WSAGetLastError());
563 if (!address->FromSockAddr(storage.addr, storage.addr_len))
564 return ERR_ADDRESS_INVALID;
565
566 return OK;
567 }
568
GetPeerAddress(IPEndPoint * address) const569 int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
570 DCHECK(CalledOnValidThread());
571 DCHECK(address);
572 if (!IsConnected())
573 return ERR_SOCKET_NOT_CONNECTED;
574 *address = *peer_address_;
575 return OK;
576 }
577
SetDefaultOptionsForServer()578 int TCPSocketWin::SetDefaultOptionsForServer() {
579 return SetExclusiveAddrUse();
580 }
581
SetDefaultOptionsForClient()582 void TCPSocketWin::SetDefaultOptionsForClient() {
583 // Increase the socket buffer sizes from the default sizes for WinXP. In
584 // performance testing, there is substantial benefit by increasing from 8KB
585 // to 64KB.
586 // See also:
587 // http://support.microsoft.com/kb/823764/EN-US
588 // On Vista, if we manually set these sizes, Vista turns off its receive
589 // window auto-tuning feature.
590 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
591 // Since Vista's auto-tune is better than any static value we can could set,
592 // only change these on pre-vista machines.
593 if (base::win::GetVersion() < base::win::VERSION_VISTA) {
594 const int32 kSocketBufferSize = 64 * 1024;
595 SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
596 SetSocketSendBufferSize(socket_, kSocketBufferSize);
597 }
598
599 DisableNagle(socket_, true);
600 SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
601 }
602
SetExclusiveAddrUse()603 int TCPSocketWin::SetExclusiveAddrUse() {
604 // On Windows, a bound end point can be hijacked by another process by
605 // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
606 // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
607 // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
608 // socket to forcibly bind to the end point until the end point is unbound.
609 // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
610 // MSDN: http://goo.gl/M6fjQ.
611 //
612 // Unlike on *nix, on Windows a TCP server socket can always bind to an end
613 // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
614 // needed here.
615 //
616 // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
617 // point in TIME_WAIT status. It does not have this effect for a TCP server
618 // socket.
619
620 BOOL true_value = 1;
621 int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
622 reinterpret_cast<const char*>(&true_value),
623 sizeof(true_value));
624 if (rv < 0)
625 return MapSystemError(errno);
626 return OK;
627 }
628
SetReceiveBufferSize(int32 size)629 int TCPSocketWin::SetReceiveBufferSize(int32 size) {
630 DCHECK(CalledOnValidThread());
631 return SetSocketReceiveBufferSize(socket_, size);
632 }
633
SetSendBufferSize(int32 size)634 int TCPSocketWin::SetSendBufferSize(int32 size) {
635 DCHECK(CalledOnValidThread());
636 return SetSocketSendBufferSize(socket_, size);
637 }
638
SetKeepAlive(bool enable,int delay)639 bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
640 return SetTCPKeepAlive(socket_, enable, delay);
641 }
642
SetNoDelay(bool no_delay)643 bool TCPSocketWin::SetNoDelay(bool no_delay) {
644 return DisableNagle(socket_, no_delay);
645 }
646
Close()647 void TCPSocketWin::Close() {
648 DCHECK(CalledOnValidThread());
649
650 if (socket_ != INVALID_SOCKET) {
651 // Only log the close event if there's actually a socket to close.
652 net_log_.AddEvent(NetLog::EventType::TYPE_SOCKET_CLOSED);
653
654 // Note: don't use CancelIo to cancel pending IO because it doesn't work
655 // when there is a Winsock layered service provider.
656
657 // In most socket implementations, closing a socket results in a graceful
658 // connection shutdown, but in Winsock we have to call shutdown explicitly.
659 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
660 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
661 shutdown(socket_, SD_SEND);
662
663 // This cancels any pending IO.
664 if (closesocket(socket_) < 0)
665 PLOG(ERROR) << "closesocket";
666 socket_ = INVALID_SOCKET;
667 }
668
669 if (!accept_callback_.is_null()) {
670 accept_watcher_.StopWatching();
671 accept_socket_ = NULL;
672 accept_address_ = NULL;
673 accept_callback_.Reset();
674 }
675
676 if (accept_event_) {
677 WSACloseEvent(accept_event_);
678 accept_event_ = WSA_INVALID_EVENT;
679 }
680
681 if (core_) {
682 if (waiting_connect_) {
683 // We closed the socket, so this notification will never come.
684 // From MSDN' WSAEventSelect documentation:
685 // "Closing a socket with closesocket also cancels the association and
686 // selection of network events specified in WSAEventSelect for the
687 // socket".
688 core_->Release();
689 }
690 core_->Detach();
691 core_ = NULL;
692 }
693
694 waiting_connect_ = false;
695 waiting_read_ = false;
696 waiting_write_ = false;
697
698 read_callback_.Reset();
699 write_callback_.Reset();
700 peer_address_.reset();
701 connect_os_error_ = 0;
702 }
703
StartLoggingMultipleConnectAttempts(const AddressList & addresses)704 void TCPSocketWin::StartLoggingMultipleConnectAttempts(
705 const AddressList& addresses) {
706 if (!logging_multiple_connect_attempts_) {
707 logging_multiple_connect_attempts_ = true;
708 LogConnectBegin(addresses);
709 } else {
710 NOTREACHED();
711 }
712 }
713
EndLoggingMultipleConnectAttempts(int net_error)714 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
715 if (logging_multiple_connect_attempts_) {
716 LogConnectEnd(net_error);
717 logging_multiple_connect_attempts_ = false;
718 } else {
719 NOTREACHED();
720 }
721 }
722
AcceptInternal(scoped_ptr<TCPSocketWin> * socket,IPEndPoint * address)723 int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
724 IPEndPoint* address) {
725 SockaddrStorage storage;
726 int new_socket = accept(socket_, storage.addr, &storage.addr_len);
727 if (new_socket < 0) {
728 int net_error = MapSystemError(WSAGetLastError());
729 if (net_error != ERR_IO_PENDING)
730 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
731 return net_error;
732 }
733
734 IPEndPoint ip_end_point;
735 if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
736 NOTREACHED();
737 if (closesocket(new_socket) < 0)
738 PLOG(ERROR) << "closesocket";
739 int net_error = ERR_ADDRESS_INVALID;
740 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
741 return net_error;
742 }
743 scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
744 net_log_.net_log(), net_log_.source()));
745 int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
746 if (adopt_result != OK) {
747 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
748 return adopt_result;
749 }
750 *socket = tcp_socket.Pass();
751 *address = ip_end_point;
752 net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
753 CreateNetLogIPEndPointCallback(&ip_end_point));
754 return OK;
755 }
756
OnObjectSignaled(HANDLE object)757 void TCPSocketWin::OnObjectSignaled(HANDLE object) {
758 WSANETWORKEVENTS ev;
759 if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
760 PLOG(ERROR) << "WSAEnumNetworkEvents()";
761 return;
762 }
763
764 if (ev.lNetworkEvents & FD_ACCEPT) {
765 int result = AcceptInternal(accept_socket_, accept_address_);
766 if (result != ERR_IO_PENDING) {
767 accept_socket_ = NULL;
768 accept_address_ = NULL;
769 base::ResetAndReturn(&accept_callback_).Run(result);
770 }
771 } else {
772 // This happens when a client opens a connection and closes it before we
773 // have a chance to accept it.
774 DCHECK(ev.lNetworkEvents == 0);
775
776 // Start watching the next FD_ACCEPT event.
777 WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
778 accept_watcher_.StartWatching(accept_event_, this);
779 }
780 }
781
DoConnect()782 int TCPSocketWin::DoConnect() {
783 DCHECK_EQ(connect_os_error_, 0);
784 DCHECK(!core_);
785
786 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
787 CreateNetLogIPEndPointCallback(peer_address_.get()));
788
789 core_ = new Core(this);
790 // WSAEventSelect sets the socket to non-blocking mode as a side effect.
791 // Our connect() and recv() calls require that the socket be non-blocking.
792 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
793
794 SockaddrStorage storage;
795 if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
796 return ERR_ADDRESS_INVALID;
797 if (!connect(socket_, storage.addr, storage.addr_len)) {
798 // Connected without waiting!
799 //
800 // The MSDN page for connect says:
801 // With a nonblocking socket, the connection attempt cannot be completed
802 // immediately. In this case, connect will return SOCKET_ERROR, and
803 // WSAGetLastError will return WSAEWOULDBLOCK.
804 // which implies that for a nonblocking socket, connect never returns 0.
805 // It's not documented whether the event object will be signaled or not
806 // if connect does return 0. So the code below is essentially dead code
807 // and we don't know if it's correct.
808 NOTREACHED();
809
810 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
811 return OK;
812 } else {
813 int os_error = WSAGetLastError();
814 if (os_error != WSAEWOULDBLOCK) {
815 LOG(ERROR) << "connect failed: " << os_error;
816 connect_os_error_ = os_error;
817 int rv = MapConnectError(os_error);
818 CHECK_NE(ERR_IO_PENDING, rv);
819 return rv;
820 }
821 }
822
823 core_->WatchForRead();
824 return ERR_IO_PENDING;
825 }
826
DoConnectComplete(int result)827 void TCPSocketWin::DoConnectComplete(int result) {
828 // Log the end of this attempt (and any OS error it threw).
829 int os_error = connect_os_error_;
830 connect_os_error_ = 0;
831 if (result != OK) {
832 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
833 NetLog::IntegerCallback("os_error", os_error));
834 } else {
835 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
836 }
837
838 if (!logging_multiple_connect_attempts_)
839 LogConnectEnd(result);
840 }
841
LogConnectBegin(const AddressList & addresses)842 void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
843 base::StatsCounter connects("tcp.connect");
844 connects.Increment();
845
846 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
847 addresses.CreateNetLogCallback());
848 }
849
LogConnectEnd(int net_error)850 void TCPSocketWin::LogConnectEnd(int net_error) {
851 if (net_error == OK)
852 UpdateConnectionTypeHistograms(CONNECTION_ANY);
853
854 if (net_error != OK) {
855 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
856 return;
857 }
858
859 struct sockaddr_storage source_address;
860 socklen_t addrlen = sizeof(source_address);
861 int rv = getsockname(
862 socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
863 if (rv != 0) {
864 LOG(ERROR) << "getsockname() [rv: " << rv
865 << "] error: " << WSAGetLastError();
866 NOTREACHED();
867 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
868 return;
869 }
870
871 net_log_.EndEvent(
872 NetLog::TYPE_TCP_CONNECT,
873 CreateNetLogSourceAddressCallback(
874 reinterpret_cast<const struct sockaddr*>(&source_address),
875 sizeof(source_address)));
876 }
877
DoRead(IOBuffer * buf,int buf_len,const CompletionCallback & callback)878 int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
879 const CompletionCallback& callback) {
880 if (!core_->non_blocking_reads_initialized_) {
881 WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
882 FD_READ | FD_CLOSE);
883 core_->non_blocking_reads_initialized_ = true;
884 }
885 int rv = recv(socket_, buf->data(), buf_len, 0);
886 if (rv == SOCKET_ERROR) {
887 int os_error = WSAGetLastError();
888 if (os_error != WSAEWOULDBLOCK) {
889 int net_error = MapSystemError(os_error);
890 net_log_.AddEvent(
891 NetLog::TYPE_SOCKET_READ_ERROR,
892 CreateNetLogSocketErrorCallback(net_error, os_error));
893 return net_error;
894 }
895 } else {
896 base::StatsCounter read_bytes("tcp.read_bytes");
897 if (rv > 0)
898 read_bytes.Add(rv);
899 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
900 buf->data());
901 return rv;
902 }
903
904 waiting_read_ = true;
905 read_callback_ = callback;
906 core_->read_iobuffer_ = buf;
907 core_->read_buffer_length_ = buf_len;
908 core_->WatchForRead();
909 return ERR_IO_PENDING;
910 }
911
DidCompleteConnect()912 void TCPSocketWin::DidCompleteConnect() {
913 DCHECK(waiting_connect_);
914 DCHECK(!read_callback_.is_null());
915 int result;
916
917 WSANETWORKEVENTS events;
918 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
919 &events);
920 int os_error = 0;
921 if (rv == SOCKET_ERROR) {
922 NOTREACHED();
923 os_error = WSAGetLastError();
924 result = MapSystemError(os_error);
925 } else if (events.lNetworkEvents & FD_CONNECT) {
926 os_error = events.iErrorCode[FD_CONNECT_BIT];
927 result = MapConnectError(os_error);
928 } else {
929 NOTREACHED();
930 result = ERR_UNEXPECTED;
931 }
932
933 connect_os_error_ = os_error;
934 DoConnectComplete(result);
935 waiting_connect_ = false;
936
937 DCHECK_NE(result, ERR_IO_PENDING);
938 base::ResetAndReturn(&read_callback_).Run(result);
939 }
940
DidCompleteWrite()941 void TCPSocketWin::DidCompleteWrite() {
942 DCHECK(waiting_write_);
943 DCHECK(!write_callback_.is_null());
944
945 DWORD num_bytes, flags;
946 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
947 &num_bytes, FALSE, &flags);
948 WSAResetEvent(core_->write_overlapped_.hEvent);
949 waiting_write_ = false;
950 int rv;
951 if (!ok) {
952 int os_error = WSAGetLastError();
953 rv = MapSystemError(os_error);
954 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
955 CreateNetLogSocketErrorCallback(rv, os_error));
956 } else {
957 rv = static_cast<int>(num_bytes);
958 if (rv > core_->write_buffer_length_ || rv < 0) {
959 // It seems that some winsock interceptors report that more was written
960 // than was available. Treat this as an error. http://crbug.com/27870
961 LOG(ERROR) << "Detected broken LSP: Asked to write "
962 << core_->write_buffer_length_ << " bytes, but " << rv
963 << " bytes reported.";
964 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
965 } else {
966 base::StatsCounter write_bytes("tcp.write_bytes");
967 write_bytes.Add(num_bytes);
968 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
969 core_->write_iobuffer_->data());
970 }
971 }
972
973 core_->write_iobuffer_ = NULL;
974
975 DCHECK_NE(rv, ERR_IO_PENDING);
976 base::ResetAndReturn(&write_callback_).Run(rv);
977 }
978
DidSignalRead()979 void TCPSocketWin::DidSignalRead() {
980 DCHECK(waiting_read_);
981 DCHECK(!read_callback_.is_null());
982
983 int os_error = 0;
984 WSANETWORKEVENTS network_events;
985 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
986 &network_events);
987 if (rv == SOCKET_ERROR) {
988 os_error = WSAGetLastError();
989 rv = MapSystemError(os_error);
990 } else if (network_events.lNetworkEvents) {
991 DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
992 // If network_events.lNetworkEvents is FD_CLOSE and
993 // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
994 // connection closure. It is tempting to directly set rv to 0 in
995 // this case, but the MSDN pages for WSAEventSelect and
996 // WSAAsyncSelect recommend we still call DoRead():
997 // FD_CLOSE should only be posted after all data is read from a
998 // socket, but an application should check for remaining data upon
999 // receipt of FD_CLOSE to avoid any possibility of losing data.
1000 //
1001 // If network_events.iErrorCode[FD_READ_BIT] or
1002 // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
1003 // DoRead() because recv() reports a more accurate error code
1004 // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
1005 // reset.
1006 rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
1007 read_callback_);
1008 if (rv == ERR_IO_PENDING)
1009 return;
1010 } else {
1011 // This may happen because Read() may succeed synchronously and
1012 // consume all the received data without resetting the event object.
1013 core_->WatchForRead();
1014 return;
1015 }
1016
1017 waiting_read_ = false;
1018 core_->read_iobuffer_ = NULL;
1019 core_->read_buffer_length_ = 0;
1020
1021 DCHECK_NE(rv, ERR_IO_PENDING);
1022 base::ResetAndReturn(&read_callback_).Run(rv);
1023 }
1024
1025 } // namespace net
1026