这是indexloc提供的服务,不要输入任何密码
Skip to content

Avoid crashing when LRU cache keys change. #96930

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions third_party/xla/xla/pjrt/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -860,6 +860,7 @@ cc_library(
]),
deps = [
"@com_google_absl//absl/container:node_hash_map",
"@com_google_absl//absl/log:check",
"@local_tsl//tsl/platform:logging",
],
)
Expand Down
12 changes: 9 additions & 3 deletions third_party/xla/xla/pjrt/lru_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ limitations under the License.
#include <unordered_map>

#include "absl/container/node_hash_map.h"
#include "absl/log/check.h"
#include "tsl/platform/logging.h"

namespace xla {
Expand Down Expand Up @@ -180,12 +181,17 @@ Value LRUCache<Key, Value, Hash, Eq>::GetOrCreateIfAbsent(
// Evict an LRU entry if we are over capacity.
if (lru_list_->size_ > lru_list_->capacity_) {
Entry* to_remove = static_cast<Entry*>(lru_head.next);
to_remove->next->prev = &lru_head;
lru_head.next = to_remove->next;
// Extract instead of erase in case the kv pair contains python objects
// whose destruction could call back into this code. Extract causes the
// dtor to be delayed until the kv pair is fully removed from the map.
to_remove->container->entries_.extract(*to_remove->key);
//
// TODO: mwhittaker - Add a check that node is equal to to_remove. If it's
// not, then the user has probably mutated a key, which is undefined
// behavior.
auto node = to_remove->container->entries_.extract(*to_remove->key);
CHECK(!node.empty());
node.mapped().next->prev = node.mapped().prev;
node.mapped().prev->next = node.mapped().next;
--lru_list_->size_;
}
return v;
Expand Down
47 changes: 47 additions & 0 deletions third_party/xla/xla/pjrt/lru_cache_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ limitations under the License.

#include "xla/pjrt/lru_cache.h"

#include <cstdint>
#include <random>

#include "xla/hlo/testlib/test.h"
Expand Down Expand Up @@ -115,5 +116,51 @@ TEST(LRUCache, RandomInsertions) {
}
}

struct Int {
int val;
};

struct IntHash {
uint64_t operator()(Int*) const { return 0; }
};

struct IntEq {
bool operator()(Int* x, Int* y) const { return x->val == y->val; }
};

TEST(LRUCache, ChangingKeys) {
// This test checks the behavior of LRUCache in the face of changing keys.
// Technically, changing keys leads to undefined behavior, but practically, we
// are checking that this does not crash. The workload itself is written
// pathologically to trigger a previous bug.
//
// The following two blocks differ only in whether they assign b.val to a.val
// or vice versa. One of the two used to crash.

{
LRUCache<Int*, int, IntHash, IntEq>::LRUList list(2);
LRUCache<Int*, int, IntHash, IntEq> cache(&list);
Int a{1};
Int b{2};
Int c{3};
cache.GetOrCreateIfAbsent(&a, [](Int*) { return 1; });
cache.GetOrCreateIfAbsent(&b, [](Int*) { return 2; });
a.val = b.val;
cache.GetOrCreateIfAbsent(&c, [](Int*) { return 3; });
}

{
LRUCache<Int*, int, IntHash, IntEq>::LRUList list(2);
LRUCache<Int*, int, IntHash, IntEq> cache(&list);
Int a{1};
Int b{2};
Int c{3};
cache.GetOrCreateIfAbsent(&a, [](Int*) { return 1; });
cache.GetOrCreateIfAbsent(&b, [](Int*) { return 2; });
b.val = a.val;
cache.GetOrCreateIfAbsent(&c, [](Int*) { return 3; });
}
}

} // namespace
} // namespace xla