diff options
author | Gunnar Beutner <gbeutner@serenityos.org> | 2021-05-12 09:14:37 +0200 |
---|---|---|
committer | Andreas Kling <kling@serenityos.org> | 2021-05-12 13:47:07 +0200 |
commit | af59f64bc0ca4446deb3265a4b1a37be3fb5d2ab (patch) | |
tree | 67899ff190d3a8c6af58fda57244c68c2fc77f52 /Kernel | |
parent | ffc6b714b0876262d80fe7f48ba071ce33ff99fb (diff) | |
download | serenity-af59f64bc0ca4446deb3265a4b1a37be3fb5d2ab.zip |
Kernel: Coalesce TCP ACKs
Previously we'd send a TCP ACK for each TCP packet we received. This
changes NetworkTask so that we send fewer TCP ACKs.
Diffstat (limited to 'Kernel')
-rw-r--r-- | Kernel/Net/NetworkTask.cpp | 55 | ||||
-rw-r--r-- | Kernel/Net/TCPSocket.cpp | 28 | ||||
-rw-r--r-- | Kernel/Net/TCPSocket.h | 6 |
3 files changed, 81 insertions, 8 deletions
diff --git a/Kernel/Net/NetworkTask.cpp b/Kernel/Net/NetworkTask.cpp index 70457c02c4..f4c5e480b5 100644 --- a/Kernel/Net/NetworkTask.cpp +++ b/Kernel/Net/NetworkTask.cpp @@ -28,8 +28,11 @@ static void handle_ipv4(const EthernetFrameHeader&, size_t frame_size, const Tim static void handle_icmp(const EthernetFrameHeader&, const IPv4Packet&, const Time& packet_timestamp); static void handle_udp(const IPv4Packet&, const Time& packet_timestamp); static void handle_tcp(const IPv4Packet&, const Time& packet_timestamp); +static void send_delayed_tcp_ack(RefPtr<TCPSocket> socket); +static void flush_delayed_tcp_acks(bool all); static Thread* network_task = nullptr; +static HashTable<RefPtr<TCPSocket>>* delayed_ack_sockets; [[noreturn]] static void NetworkTask_main(void*); @@ -47,6 +50,8 @@ bool NetworkTask::is_current() void NetworkTask_main(void*) { + delayed_ack_sockets = new HashTable<RefPtr<TCPSocket>>; + WaitQueue packet_wait_queue; int pending_packets = 0; NetworkAdapter::for_each([&](auto& adapter) { @@ -86,9 +91,13 @@ void NetworkTask_main(void*) for (;;) { size_t packet_size = dequeue_packet(buffer, buffer_size, packet_timestamp); if (!packet_size) { + // We might sleep for a while so we must flush all delayed TCP ACKs + // including those which haven't expired yet. + flush_delayed_tcp_acks(true); packet_wait_queue.wait_forever("NetworkTask"); continue; } + flush_delayed_tcp_acks(false); if (packet_size < sizeof(EthernetFrameHeader)) { dbgln("NetworkTask: Packet is too small to be an Ethernet packet! ({})", packet_size); continue; @@ -279,6 +288,38 @@ void handle_udp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) socket->did_receive(ipv4_packet.source(), udp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp); } +void send_delayed_tcp_ack(RefPtr<TCPSocket> socket) +{ + VERIFY(socket->lock().is_locked()); + if (!socket->should_delay_next_ack()) { + [[maybe_unused]] auto result = socket->send_ack(); + return; + } + + delayed_ack_sockets->set(move(socket)); +} + +void flush_delayed_tcp_acks(bool all) +{ + Vector<RefPtr<TCPSocket>, 32> remaining_sockets; + for (auto& socket : *delayed_ack_sockets) { + Locker locker(socket->lock()); + if (!all && socket->should_delay_next_ack()) { + remaining_sockets.append(socket); + continue; + } + [[maybe_unused]] auto result = socket->send_ack(); + } + + if (remaining_sockets.size() != delayed_ack_sockets->size()) { + delayed_ack_sockets->clear(); + if (remaining_sockets.size() > 0) + dbgln("flush_delayed_tcp_acks: {} sockets remaining", remaining_sockets.size()); + for (auto&& socket : remaining_sockets) + delayed_ack_sockets->set(move(socket)); + } +} + void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) { if (ipv4_packet.payload_size() < sizeof(TCPPacket)) { @@ -393,26 +434,26 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) switch (tcp_packet.flags()) { case TCPFlags::SYN: socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); - unused_rc = socket->send_tcp_packet(TCPFlags::ACK); + send_delayed_tcp_ack(socket); socket->set_state(TCPSocket::State::SynReceived); return; case TCPFlags::ACK | TCPFlags::SYN: socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); - unused_rc = socket->send_tcp_packet(TCPFlags::ACK); + send_delayed_tcp_ack(socket); socket->set_state(TCPSocket::State::Established); socket->set_setup_state(Socket::SetupState::Completed); socket->set_connected(true); return; case TCPFlags::ACK | TCPFlags::FIN: socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); - unused_rc = socket->send_tcp_packet(TCPFlags::ACK); + send_delayed_tcp_ack(socket); socket->set_state(TCPSocket::State::Closed); socket->set_error(TCPSocket::Error::FINDuringConnect); socket->set_setup_state(Socket::SetupState::Completed); return; case TCPFlags::ACK | TCPFlags::RST: socket->set_ack_number(tcp_packet.sequence_number() + payload_size); - unused_rc = socket->send_tcp_packet(TCPFlags::ACK); + send_delayed_tcp_ack(socket); socket->set_state(TCPSocket::State::Closed); socket->set_error(TCPSocket::Error::RSTDuringConnect); socket->set_setup_state(Socket::SetupState::Completed); @@ -536,7 +577,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) if (socket->duplicate_acks() < TCPSocket::maximum_duplicate_acks) { dbgln_if(TCP_DEBUG, "Sending ACK with same ack number to trigger fast retransmission"); socket->set_duplicate_acks(socket->duplicate_acks() + 1); - unused_rc = socket->send_tcp_packet(TCPFlags::ACK); + [[maybe_unused]] auto result = socket->send_ack(true); } return; } @@ -548,7 +589,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) socket->did_receive(ipv4_packet.source(), tcp_packet.source_port(), { &ipv4_packet, sizeof(IPv4Packet) + ipv4_packet.payload_size() }, packet_timestamp); socket->set_ack_number(tcp_packet.sequence_number() + payload_size + 1); - unused_rc = socket->send_tcp_packet(TCPFlags::ACK); + send_delayed_tcp_ack(socket); socket->set_state(TCPSocket::State::CloseWait); socket->set_connected(false); return; @@ -559,7 +600,7 @@ void handle_tcp(const IPv4Packet& ipv4_packet, const Time& packet_timestamp) socket->set_ack_number(tcp_packet.sequence_number() + payload_size); dbgln_if(TCP_DEBUG, "Got packet with ack_no={}, seq_no={}, payload_size={}, acking it with new ack_no={}, seq_no={}", tcp_packet.ack_number(), tcp_packet.sequence_number(), payload_size, socket->ack_number(), socket->sequence_number()); - unused_rc = socket->send_tcp_packet(TCPFlags::ACK); + send_delayed_tcp_ack(socket); } } } diff --git a/Kernel/Net/TCPSocket.cpp b/Kernel/Net/TCPSocket.cpp index e72604fa23..c3176538c9 100644 --- a/Kernel/Net/TCPSocket.cpp +++ b/Kernel/Net/TCPSocket.cpp @@ -167,6 +167,13 @@ KResultOr<size_t> TCPSocket::protocol_send(const UserOrKernelBuffer& data, size_ return data_length; } +KResult TCPSocket::send_ack(bool allow_duplicate) +{ + if (!allow_duplicate && m_last_ack_number_sent == m_ack_number) + return KSuccess; + return send_tcp_packet(TCPFlags::ACK); +} + KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, size_t payload_size) { const bool has_mss_option = flags == TCPFlags::SYN; @@ -183,8 +190,11 @@ KResult TCPSocket::send_tcp_packet(u16 flags, const UserOrKernelBuffer* payload, tcp_packet.set_data_offset(header_size / sizeof(u32)); tcp_packet.set_flags(flags); - if (flags & TCPFlags::ACK) + if (flags & TCPFlags::ACK) { + m_last_ack_number_sent = m_ack_number; + m_last_ack_sent_time = kgettimeofday(); tcp_packet.set_ack_number(m_ack_number); + } if (payload && !payload->read(tcp_packet.payload(), payload_size)) return EFAULT; @@ -308,6 +318,22 @@ void TCPSocket::receive_tcp_packet(const TCPPacket& packet, u16 size) m_bytes_in += packet.header_size() + size; } +bool TCPSocket::should_delay_next_ack() const +{ + // FIXME: We don't know the MSS here so make a reasonable guess. + const size_t mss = 1500; + + // RFC 1122 says we should send an ACK for every two full-sized segments. + if (m_ack_number >= m_last_ack_number_sent + 2 * mss) + return false; + + // RFC 1122 says we should not delay ACKs for more than 500 milliseconds. + if (kgettimeofday() >= m_last_ack_sent_time + Time::from_milliseconds(500)) + return false; + + return true; +} + NetworkOrdered<u16> TCPSocket::compute_tcp_checksum(const IPv4Address& source, const IPv4Address& destination, const TCPPacket& packet, u16 payload_size) { struct [[gnu::packed]] PseudoHeader { diff --git a/Kernel/Net/TCPSocket.h b/Kernel/Net/TCPSocket.h index cb490effbe..28621a98f0 100644 --- a/Kernel/Net/TCPSocket.h +++ b/Kernel/Net/TCPSocket.h @@ -133,10 +133,13 @@ public: void set_duplicate_acks(u32 acks) { m_duplicate_acks = acks; } u32 duplicate_acks() const { return m_duplicate_acks; } + KResult send_ack(bool allow_duplicate = false); KResult send_tcp_packet(u16 flags, const UserOrKernelBuffer* = nullptr, size_t = 0); void send_outgoing_packets(RoutingDecision&); void receive_tcp_packet(const TCPPacket&, u16 size); + bool should_delay_next_ack() const; + static Lockable<HashMap<IPv4SocketTuple, TCPSocket*>>& sockets_by_tuple(); static RefPtr<TCPSocket> from_tuple(const IPv4SocketTuple& tuple); static RefPtr<TCPSocket> from_endpoints(const IPv4Address& local_address, u16 local_port, const IPv4Address& peer_address, u16 peer_port); @@ -194,6 +197,9 @@ private: SinglyLinkedList<OutgoingPacket> m_not_acked; u32 m_duplicate_acks { 0 }; + + u32 m_last_ack_number_sent { 0 }; + Time m_last_ack_sent_time; }; } |