Skip to content

Commit

Permalink
Add a flag to configure Navy to fail startup if we're not the sole ow…
Browse files Browse the repository at this point in the history
…ner of a file

Summary:
This allows Navy to fail fast if we're not the sole owner of a flash device/file that we're using for NvmCache.

RIght now this is an opt-in feature. Next step is to move existing use-cases to specify this flag, and then make this the default behavior.

Reviewed By: jaesoo-fb

Differential Revision: D53013668

fbshipit-source-id: a2fef73ef234fd1242924eddac57ba9865792a0d
  • Loading branch information
Jimmy Lu authored and facebook-github-bot committed Jan 30, 2024
1 parent c11e0c6 commit 9853dfa
Show file tree
Hide file tree
Showing 7 changed files with 209 additions and 156 deletions.
8 changes: 8 additions & 0 deletions cachelib/allocator/nvmcache/NavyConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,7 @@ class NavyConfig {

// ============ Device settings =============
uint64_t getBlockSize() const { return blockSize_; }
bool getExclusiveOwner() const { return isExclusiveOwner_; }
const std::string& getFileName() const;
const std::vector<std::string>& getRaidPaths() const;
uint64_t getDeviceMetadataSize() const { return deviceMetadataSize_; }
Expand Down Expand Up @@ -553,6 +554,11 @@ class NavyConfig {
void setBlockSize(uint64_t blockSize) noexcept { blockSize_ = blockSize; }
// Set the NVMe FDP Device data placement mode in the Cachelib
void setEnableFDP(bool enable) noexcept { enableFDP_ = enable; }
// If true, Navy will only start if it's the sole owner of the file.
// This only applies to non-memory-backed files.
void setExclusiveOwner(bool isExclusiveOwner) noexcept {
isExclusiveOwner_ = isExclusiveOwner;
}
// Set the parameters for a simple file.
// @throw std::invalid_argument if RAID files have been already set.
void setSimpleFile(const std::string& fileName,
Expand Down Expand Up @@ -639,6 +645,8 @@ class NavyConfig {
// ============ Device settings =============
// Navy specific device block size in bytes.
uint64_t blockSize_{4096};
// If true, Navy will only start if it's the sole owner of the file.
bool isExclusiveOwner_{false};
// The file name/path for caching.
std::string fileName_;
// An array of Navy RAID device file paths.
Expand Down
3 changes: 2 additions & 1 deletion cachelib/allocator/nvmcache/NavySetup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,8 @@ std::unique_ptr<cachelib::navy::Device> createDevice(
config.getIoEngine(),
config.getQDepth(),
config.isFDPEnabled(),
std::move(encryptor));
std::move(encryptor),
config.getExclusiveOwner());
} else {
return cachelib::navy::createMemoryDevice(config.getFileSize(),
std::move(encryptor), blockSize);
Expand Down
129 changes: 0 additions & 129 deletions cachelib/navy/Factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,91 +338,6 @@ class CacheProtoImpl final : public CacheProto {
std::vector<std::unique_ptr<EnginePairProto>> enginePairsProto_;
Driver::Config config_;
};
// Open cache file @fileName and set it size to @size.
// Throws std::system_error if failed.
folly::File openCacheFile(const std::string& fileName,
uint64_t size,
bool truncate) {
XLOG(INFO) << "Cache file: " << fileName << " size: " << size
<< " truncate: " << truncate;
if (fileName.empty()) {
throw std::invalid_argument("File name is empty");
}

int flags{O_RDWR | O_CREAT};
// try opening with o_direct. For tests, we might get a file on tmpfs that
// might not support o_direct. Hence, we might have to default to avoiding
// o_direct in those cases.
folly::File f;

try {
f = folly::File(fileName.c_str(), flags | O_DIRECT);
} catch (const std::system_error& e) {
if (e.code().value() == EINVAL) {
XLOG(ERR) << "Failed to open with o-direct, trying without. Error: "
<< e.what();
f = folly::File(fileName.c_str(), flags);
} else {
throw;
}
}
XDCHECK_GE(f.fd(), 0);

// get current file size
struct stat fileStat;
if (fstat(f.fd(), &fileStat) < 0) {
throw std::system_error(
errno,
std::system_category(),
folly::sformat("failed to get the file stat for file {}", fileName));
}

uint64_t curfileSize = fileStat.st_size;

// ftruncate the file if requesting a smaller file size and truncate flag is
// set
if (truncate && size < curfileSize) {
if (::ftruncate(f.fd(), size /*length*/) < 0) {
throw std::system_error(
errno,
std::system_category(),
folly::sformat(
"ftruncate failed with requested size {}, current size {}", size,
curfileSize));
}
XLOGF(INFO, "Cache file {} is ftruncated from {} bytes to {} bytes",
fileName, curfileSize, size);
}

#ifndef MISSING_FALLOCATE
// TODO(jiayueb): make allocate flag user configurable and migrate the
// existing use cases
// fallocate the file if requesting a larger file size and allocate flag is
// set
if (truncate && size > curfileSize) {
if (::fallocate(f.fd(), 0 /*mode*/, curfileSize /*offset*/,
size - curfileSize /*len*/) < 0) {
throw std::system_error(
errno,
std::system_category(),
folly::sformat(
"fallocate failed with requested size {}, current size {}", size,
curfileSize));
}
XLOGF(INFO, "Cache file {} is fallocated from {} bytes to {} bytes",
fileName, curfileSize, size);
}
#endif

#ifndef MISSING_FADVISE
if (::posix_fadvise(f.fd(), 0, size, POSIX_FADV_DONTNEED) < 0) {
throw std::system_error(errno, std::system_category(),
"Error fadvising cache file");
}
#endif

return f;
}
} // namespace

std::unique_ptr<BlockCacheProto> createBlockCacheProto() {
Expand All @@ -444,48 +359,4 @@ std::unique_ptr<CacheProto> createCacheProto() {
std::unique_ptr<AbstractCache> createCache(std::unique_ptr<CacheProto> proto) {
return std::move(dynamic_cast<CacheProtoImpl&>(*proto)).create();
}

std::unique_ptr<Device> createFileDevice(
std::vector<std::string> filePaths,
uint64_t fdSize,
bool truncateFile,
uint32_t blockSize,
uint32_t stripeSize,
uint32_t maxDeviceWriteSize,
IoEngine ioEngine,
uint32_t qDepth,
bool isFDPEnabled,
std::shared_ptr<navy::DeviceEncryptor> encryptor) {
// File paths are opened in the increasing order of the
// path string. This ensures that RAID0 stripes aren't
// out of order even if the caller changes the order of
// the file paths. We can recover the cache as long as all
// the paths are specified, regardless of the order.

std::sort(filePaths.begin(), filePaths.end());
std::vector<folly::File> fileVec;
for (const auto& path : filePaths) {
folly::File f;
try {
f = openCacheFile(path, fdSize, truncateFile);
} catch (const std::exception& e) {
XLOG(ERR) << "Exception in openCacheFile(" << path << "): " << e.what()
<< ". Errno: " << errno;
throw;
}
fileVec.push_back(std::move(f));
}

return createDirectIoFileDevice(std::move(fileVec),
std::move(filePaths),
fdSize,
blockSize,
stripeSize,
maxDeviceWriteSize,
ioEngine,
qDepth,
isFDPEnabled,
std::move(encryptor));
}

} // namespace facebook::cachelib::navy
26 changes: 0 additions & 26 deletions cachelib/navy/Factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,32 +197,6 @@ std::unique_ptr<CacheProto> createCacheProto();
// Creates Cache object.
// @param proto cache object prototype
std::unique_ptr<AbstractCache> createCache(std::unique_ptr<CacheProto> proto);

// Creates a direct IO file device.
// RAID0 with given stripe size is applied if multiple files are provided
//
// @param filePaths name(s) of the file(s)
// @param fileSize size of the file(s)
// @param truncateFile whether to truncate the file
// @param blockSize device block size
// @param stripeSize RAID stripe size if applicable
// @param maxDeviceWriteSize device maximum granularity of writes
// @param ioEngine IoEngine to be used for IO
// @param qDepth queue depth for async IO; 0 for sync IO
// @param isFDPEnabled whether FDP placement mode enabled or not
// @param encryptor encryption object
std::unique_ptr<Device> createFileDevice(
std::vector<std::string> filePaths,
uint64_t fileSize,
bool truncateFile,
uint32_t blockSize,
uint32_t stripeSize,
uint32_t maxDeviceWriteSize,
IoEngine ioEngine,
uint32_t qDepth,
bool isFDPEnabled,
std::shared_ptr<navy::DeviceEncryptor> encryptor);

} // namespace navy
} // namespace cachelib
} // namespace facebook
130 changes: 130 additions & 0 deletions cachelib/navy/common/Device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,92 @@ int FileDevice::allocatePlacementHandle() {
return -1;
}

// Open cache file @fileName and set it size to @size.
// Throws std::system_error if failed.
folly::File openCacheFile(const std::string& fileName,
uint64_t size,
bool truncate,
bool isExclusiveOwner) {
XLOG(INFO) << "Cache file: " << fileName << " size: " << size
<< " truncate: " << truncate;
if (fileName.empty()) {
throw std::invalid_argument("File name is empty");
}

const int flags{O_RDWR | O_CREAT | (isExclusiveOwner ? O_EXCL : 0)};
// try opening with o_direct. For tests, we might get a file on tmpfs that
// might not support o_direct. Hence, we might have to default to avoiding
// o_direct in those cases.
folly::File f;

try {
f = folly::File(fileName.c_str(), flags | O_DIRECT);
} catch (const std::system_error& e) {
if (e.code().value() == EINVAL) {
XLOG(ERR) << "Failed to open with o-direct, trying without. Error: "
<< e.what();
f = folly::File(fileName.c_str(), flags);
} else {
throw;
}
}
XDCHECK_GE(f.fd(), 0);

// get current file size
struct stat fileStat;
if (fstat(f.fd(), &fileStat) < 0) {
throw std::system_error(
errno,
std::system_category(),
folly::sformat("failed to get the file stat for file {}", fileName));
}

uint64_t curfileSize = fileStat.st_size;

// ftruncate the file if requesting a smaller file size and truncate flag is
// set
if (truncate && size < curfileSize) {
if (::ftruncate(f.fd(), size /*length*/) < 0) {
throw std::system_error(
errno,
std::system_category(),
folly::sformat(
"ftruncate failed with requested size {}, current size {}", size,
curfileSize));
}
XLOGF(INFO, "Cache file {} is ftruncated from {} bytes to {} bytes",
fileName, curfileSize, size);
}

#ifndef MISSING_FALLOCATE
// TODO(jiayueb): make allocate flag user configurable and migrate the
// existing use cases
// fallocate the file if requesting a larger file size and allocate flag is
// set
if (truncate && size > curfileSize) {
if (::fallocate(f.fd(), 0 /*mode*/, curfileSize /*offset*/,
size - curfileSize /*len*/) < 0) {
throw std::system_error(
errno,
std::system_category(),
folly::sformat(
"fallocate failed with requested size {}, current size {}", size,
curfileSize));
}
XLOGF(INFO, "Cache file {} is fallocated from {} bytes to {} bytes",
fileName, curfileSize, size);
}
#endif

#ifndef MISSING_FADVISE
if (::posix_fadvise(f.fd(), 0, size, POSIX_FADV_DONTNEED) < 0) {
throw std::system_error(errno, std::system_category(),
"Error fadvising cache file");
}
#endif

return f;
}
} // namespace

std::unique_ptr<Device> createMemoryDevice(
Expand Down Expand Up @@ -1160,4 +1246,48 @@ std::unique_ptr<Device> createDirectIoFileDevice(
encryptor);
}

std::unique_ptr<Device> createFileDevice(
std::vector<std::string> filePaths,
uint64_t fdSize,
bool truncateFile,
uint32_t blockSize,
uint32_t stripeSize,
uint32_t maxDeviceWriteSize,
IoEngine ioEngine,
uint32_t qDepth,
bool isFDPEnabled,
std::shared_ptr<navy::DeviceEncryptor> encryptor,
bool isExclusiveOwner) {
// File paths are opened in the increasing order of the
// path string. This ensures that RAID0 stripes aren't
// out of order even if the caller changes the order of
// the file paths. We can recover the cache as long as all
// the paths are specified, regardless of the order.

std::sort(filePaths.begin(), filePaths.end());
std::vector<folly::File> fileVec;
for (const auto& path : filePaths) {
folly::File f;
try {
// TODO: beyondsora implement
f = openCacheFile(path, fdSize, truncateFile, isExclusiveOwner);
} catch (const std::exception& e) {
XLOG(ERR) << "Exception in openCacheFile(" << path << "): " << e.what()
<< ". Errno: " << errno;
throw;
}
fileVec.push_back(std::move(f));
}

return createDirectIoFileDevice(std::move(fileVec),
std::move(filePaths),
fdSize,
blockSize,
stripeSize,
maxDeviceWriteSize,
ioEngine,
qDepth,
isFDPEnabled,
std::move(encryptor));
}
} // namespace facebook::cachelib::navy
27 changes: 27 additions & 0 deletions cachelib/navy/common/Device.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,33 @@ std::unique_ptr<Device> createDirectIoFileDevice(
uint32_t stripeSize,
uint32_t maxDeviceWriteSize,
std::shared_ptr<DeviceEncryptor> encryptor);

// Creates a direct IO file device.
// RAID0 with given stripe size is applied if multiple files are provided
//
// @param filePaths name(s) of the file(s)
// @param fileSize size of the file(s)
// @param truncateFile whether to truncate the file
// @param blockSize device block size
// @param stripeSize RAID stripe size if applicable
// @param maxDeviceWriteSize device maximum granularity of writes
// @param ioEngine IoEngine to be used for IO
// @param qDepth queue depth for async IO; 0 for sync IO
// @param isFDPEnabled whether FDP placement mode enabled or not
// @param encryptor encryption object
// @param isExclusiveOwner fail if not sole owner of the file
std::unique_ptr<Device> createFileDevice(
std::vector<std::string> filePaths,
uint64_t fileSize,
bool truncateFile,
uint32_t blockSize,
uint32_t stripeSize,
uint32_t maxDeviceWriteSize,
IoEngine ioEngine,
uint32_t qDepth,
bool isFDPEnabled,
std::shared_ptr<navy::DeviceEncryptor> encryptor,
bool isExclusiveOwner);
} // namespace navy
} // namespace cachelib
} // namespace facebook
Loading

0 comments on commit 9853dfa

Please sign in to comment.