From d3dac4a6b2427462645766ebc149f4c256985a6f Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Mon, 15 Jul 2024 22:17:16 +0400 Subject: [PATCH 01/10] tart pull: try to re-use APFS blocks by cloning the base image --- Sources/tart/LocalLayerCache.swift | 3 ++ Sources/tart/OCI/Layerizer/DiskV2.swift | 49 +++++++++++++++++++++---- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/Sources/tart/LocalLayerCache.swift b/Sources/tart/LocalLayerCache.swift index ee0ff2e0..de2dc029 100644 --- a/Sources/tart/LocalLayerCache.swift +++ b/Sources/tart/LocalLayerCache.swift @@ -1,10 +1,13 @@ import Foundation struct LocalLayerCache { + let diskURL: URL private let mappedDisk: Data private var digestToRange: [String : Range] = [:] init?(_ diskURL: URL, _ manifest: OCIManifest) throws { + self.diskURL = diskURL + // mmap(2) the disk that contains the layers from the manifest self.mappedDisk = try Data(contentsOf: diskURL, options: [.alwaysMapped]) diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index dcdce7eb..1414af1e 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -37,8 +37,17 @@ class DiskV2: Disk { // Support resumable pulls let pullResumed = FileManager.default.fileExists(atPath: diskURL.path) - if !pullResumed && !FileManager.default.createFile(atPath: diskURL.path, contents: nil) { - throw OCIError.FailedToCreateVmFile + if !pullResumed { + if let localLayerCache = localLayerCache { + // Clone the local layer cache's disk and use it as a base, potentially + // reducing the space usage since some blocks won't be written at all + try FileManager.default.copyItem(at: localLayerCache.diskURL, to: diskURL) + } else { + // Otherwise create an empty disk + if !FileManager.default.createFile(atPath: diskURL.path, contents: nil) { + throw OCIError.FailedToCreateVmFile + } + } } // Calculate the uncompressed disk size @@ -89,13 +98,21 @@ class DiskV2: Disk { return } - // Open the disk file + // Open the disk file for writing let disk = try FileHandle(forWritingTo: diskURL) + // Also open the disk file for reading and verifying + // its contents in case the local layer cache is used + let rdisk: FileHandle? = if localLayerCache != nil { + try FileHandle(forReadingFrom: diskURL) + } else { + nil + } + // Check if we already have this layer contents in the local layer cache if let localLayerCache = localLayerCache, let data = localLayerCache.find(diskLayer.digest), Digest.hash(data) == uncompressedLayerContentDigest { // Fulfil the layer contents from the local blob cache - _ = try zeroSkippingWrite(disk, diskWritingOffset, data) + _ = try zeroSkippingWrite(disk, rdisk, diskWritingOffset, data) try disk.close() // Update the progress @@ -112,7 +129,7 @@ class DiskV2: Disk { return } - diskWritingOffset = try zeroSkippingWrite(disk, diskWritingOffset, data) + diskWritingOffset = try zeroSkippingWrite(disk, rdisk, diskWritingOffset, data) } try await registry.pullBlob(diskLayer.digest) { data in @@ -132,7 +149,7 @@ class DiskV2: Disk { } } - private static func zeroSkippingWrite(_ disk: FileHandle, _ offset: UInt64, _ data: Data) throws -> UInt64 { + private static func zeroSkippingWrite(_ disk: FileHandle, _ rdisk: FileHandle?, _ offset: UInt64, _ data: Data) throws -> UInt64 { let holeGranularityBytes = 64 * 1024 // A zero chunk for faster than byte-by-byte comparisons @@ -152,7 +169,25 @@ class DiskV2: Disk { var offset = offset for chunk in data.chunks(ofCount: holeGranularityBytes) { - // Only write chunks that are not zero + // If the local layer cache is used, only write chunks that differ + // since the base disk can contain anything at any position + if let rdisk = rdisk { + try rdisk.seek(toOffset: offset) + let actualContentsOnDisk = try rdisk.read(upToCount: chunk.count) + + if chunk != actualContentsOnDisk { + try disk.seek(toOffset: offset) + disk.write(chunk) + } + + offset += UInt64(chunk.count) + + continue + } + + // Otherwise, only write chunks that are not zero + // since the base disk is created from scratch and + // is zeroed via truncate(2) if chunk != zeroChunk { try disk.seek(toOffset: offset) disk.write(chunk) From bb88b32c3b808d0c10ec2e793d3d67a1375ccf83 Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Wed, 17 Jul 2024 13:18:42 +0400 Subject: [PATCH 02/10] Punch a hole when a zero chunk is detected --- Sources/tart/OCI/Layerizer/DiskV2.swift | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index 1414af1e..f03673df 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -1,5 +1,6 @@ import Foundation import Compression +import System class DiskV2: Disk { private static let bufferSizeBytes = 4 * 1024 * 1024 @@ -175,7 +176,13 @@ class DiskV2: Disk { try rdisk.seek(toOffset: offset) let actualContentsOnDisk = try rdisk.read(upToCount: chunk.count) - if chunk != actualContentsOnDisk { + if chunk == zeroChunk { + var arg = fpunchhole_t(fp_flags: 0, reserved: 0, fp_offset: off_t(offset), fp_length: off_t(chunk.count)) + + if fcntl(disk.fileDescriptor, F_PUNCHHOLE, &arg) == -1 { + throw RuntimeError.PullFailed("failed to punch hole: \(Errno.lastErrnoValue)") + } + } else if chunk != actualContentsOnDisk { try disk.seek(toOffset: offset) disk.write(chunk) } From 78c59becca89eabf553b1f6f851c38b492c71e77 Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Wed, 17 Jul 2024 18:45:02 +0400 Subject: [PATCH 03/10] Properly retrieve errno when hole punching operation fails --- Sources/tart/OCI/Layerizer/DiskV2.swift | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index f03673df..71409ec7 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -180,7 +180,9 @@ class DiskV2: Disk { var arg = fpunchhole_t(fp_flags: 0, reserved: 0, fp_offset: off_t(offset), fp_length: off_t(chunk.count)) if fcntl(disk.fileDescriptor, F_PUNCHHOLE, &arg) == -1 { - throw RuntimeError.PullFailed("failed to punch hole: \(Errno.lastErrnoValue)") + let details = Errno(rawValue: errno) + + throw RuntimeError.PullFailed("failed to punch hole: \(details)") } } else if chunk != actualContentsOnDisk { try disk.seek(toOffset: offset) From c563142f7fbf7e00f0f311271507d3633d32e05c Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Wed, 17 Jul 2024 18:47:13 +0400 Subject: [PATCH 04/10] tart pull: do not retry on RuntimeError --- Sources/tart/VMStorageOCI.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/tart/VMStorageOCI.swift b/Sources/tart/VMStorageOCI.swift index 6a920f78..a4ad4b89 100644 --- a/Sources/tart/VMStorageOCI.swift +++ b/Sources/tart/VMStorageOCI.swift @@ -202,6 +202,10 @@ class VMStorageOCI: PrunableStorage { try await tmpVMDir.pullFromRegistry(registry: registry, manifest: manifest, concurrency: concurrency, localLayerCache: localLayerCache) } recoverFromFailure: { error in + if error is RuntimeError { + return .throw + } + print("Error: \(error.localizedDescription)") print("Attempting to re-try...") From b85deec31e49cd1511a761eb8158db23987761c2 Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Wed, 17 Jul 2024 19:16:10 +0400 Subject: [PATCH 05/10] Ensure that the holes we're about to punch are FS block size-aligned --- Sources/tart/OCI/Layerizer/DiskV2.swift | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index 71409ec7..e99cc4f0 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -68,6 +68,15 @@ class DiskV2: Disk { try disk.truncate(atOffset: uncompressedDiskSize) try disk.close() + // Determine the file system block size + var st = stat() + if stat(diskURL.path, &st) == -1 { + let details = Errno(rawValue: errno) + + throw RuntimeError.PullFailed("failed to stat(2) disk \(diskURL.path): \(details)") + } + let fsBlockSize = UInt64(st.st_blksize) + // Concurrently fetch and decompress layers try await withThrowingTaskGroup(of: Void.self) { group in var globalDiskWritingOffset: UInt64 = 0 @@ -113,7 +122,7 @@ class DiskV2: Disk { // Check if we already have this layer contents in the local layer cache if let localLayerCache = localLayerCache, let data = localLayerCache.find(diskLayer.digest), Digest.hash(data) == uncompressedLayerContentDigest { // Fulfil the layer contents from the local blob cache - _ = try zeroSkippingWrite(disk, rdisk, diskWritingOffset, data) + _ = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data) try disk.close() // Update the progress @@ -130,7 +139,7 @@ class DiskV2: Disk { return } - diskWritingOffset = try zeroSkippingWrite(disk, rdisk, diskWritingOffset, data) + diskWritingOffset = try zeroSkippingWrite(disk, rdisk, fsBlockSize, diskWritingOffset, data) } try await registry.pullBlob(diskLayer.digest) { data in @@ -150,7 +159,7 @@ class DiskV2: Disk { } } - private static func zeroSkippingWrite(_ disk: FileHandle, _ rdisk: FileHandle?, _ offset: UInt64, _ data: Data) throws -> UInt64 { + private static func zeroSkippingWrite(_ disk: FileHandle, _ rdisk: FileHandle?, _ fsBlockSize: UInt64, _ offset: UInt64, _ data: Data) throws -> UInt64 { let holeGranularityBytes = 64 * 1024 // A zero chunk for faster than byte-by-byte comparisons @@ -176,7 +185,10 @@ class DiskV2: Disk { try rdisk.seek(toOffset: offset) let actualContentsOnDisk = try rdisk.read(upToCount: chunk.count) - if chunk == zeroChunk { + // F_PUNCHHOLE requires the holes to be aligned to file system block boundaries + let isHoleAligned = (offset % fsBlockSize) == 0 && (UInt64(chunk.count) % fsBlockSize) == 0 + + if isHoleAligned && chunk == zeroChunk { var arg = fpunchhole_t(fp_flags: 0, reserved: 0, fp_offset: off_t(offset), fp_length: off_t(chunk.count)) if fcntl(disk.fileDescriptor, F_PUNCHHOLE, &arg) == -1 { From f9f7c1cd846b63111b4d74c0533476ee74816d16 Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Wed, 17 Jul 2024 19:20:59 +0400 Subject: [PATCH 06/10] VMDirectory: remove unused static variables --- Sources/tart/VMDirectory+OCI.swift | 3 --- 1 file changed, 3 deletions(-) diff --git a/Sources/tart/VMDirectory+OCI.swift b/Sources/tart/VMDirectory+OCI.swift index 07c59225..584d597b 100644 --- a/Sources/tart/VMDirectory+OCI.swift +++ b/Sources/tart/VMDirectory+OCI.swift @@ -11,9 +11,6 @@ enum OCIError: Error { } extension VMDirectory { - private static let bufferSizeBytes = 64 * 1024 * 1024 - private static let layerLimitBytes = 500 * 1000 * 1000 - func pullFromRegistry(registry: Registry, manifest: OCIManifest, concurrency: UInt, localLayerCache: LocalLayerCache?) async throws { // Pull VM's config file layer and re-serialize it into a config file let configLayers = manifest.layers.filter { From 4ce2dded8a6cb45f9b1425ed768ab1c1143ed175 Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Wed, 17 Jul 2024 19:40:57 +0400 Subject: [PATCH 07/10] tart pull: log if we've found an image to deduplicate against --- Sources/tart/LocalLayerCache.swift | 7 ++++++- Sources/tart/VMDirectory+OCI.swift | 2 +- Sources/tart/VMStorageOCI.swift | 18 ++++++++++++------ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/Sources/tart/LocalLayerCache.swift b/Sources/tart/LocalLayerCache.swift index de2dc029..58c21bad 100644 --- a/Sources/tart/LocalLayerCache.swift +++ b/Sources/tart/LocalLayerCache.swift @@ -1,11 +1,16 @@ import Foundation struct LocalLayerCache { + let name: String + let deduplicatedBytes: UInt64 let diskURL: URL + private let mappedDisk: Data private var digestToRange: [String : Range] = [:] - init?(_ diskURL: URL, _ manifest: OCIManifest) throws { + init?(_ name: String, _ deduplicatedBytes: UInt64, _ diskURL: URL, _ manifest: OCIManifest) throws { + self.name = name + self.deduplicatedBytes = deduplicatedBytes self.diskURL = diskURL // mmap(2) the disk that contains the layers from the manifest diff --git a/Sources/tart/VMDirectory+OCI.swift b/Sources/tart/VMDirectory+OCI.swift index 584d597b..7eb2ddb1 100644 --- a/Sources/tart/VMDirectory+OCI.swift +++ b/Sources/tart/VMDirectory+OCI.swift @@ -77,7 +77,7 @@ extension VMDirectory { } try nvram.close() - // Serialize VM's manifest to enable better de-duplication on subsequent "tart pull"'s + // Serialize VM's manifest to enable better deduplication on subsequent "tart pull"'s try manifest.toJSON().write(to: manifestURL) } diff --git a/Sources/tart/VMStorageOCI.swift b/Sources/tart/VMStorageOCI.swift index a4ad4b89..a8026342 100644 --- a/Sources/tart/VMStorageOCI.swift +++ b/Sources/tart/VMStorageOCI.swift @@ -200,6 +200,12 @@ class VMStorageOCI: PrunableStorage { // Choose the best base image which has the most deduplication ratio let localLayerCache = try await chooseLocalLayerCache(name, manifest, registry) + if let llc = localLayerCache { + let deduplicatedHuman = ByteCountFormatter.string(fromByteCount: Int64(llc.deduplicatedBytes), countStyle: .file) + + defaultLogger.appendNewLine("found an image \(llc.name) that will allow us to deduplicate \(deduplicatedHuman), using it as a base...") + } + try await tmpVMDir.pullFromRegistry(registry: registry, manifest: manifest, concurrency: concurrency, localLayerCache: localLayerCache) } recoverFromFailure: { error in if error is RuntimeError { @@ -250,15 +256,15 @@ class VMStorageOCI: PrunableStorage { func chooseLocalLayerCache(_ name: RemoteName, _ manifest: OCIManifest, _ registry: Registry) async throws -> LocalLayerCache? { // Establish a closure that will calculate how much bytes - // we'll de-duplicate if we re-use the given manifest + // we'll deduplicate if we re-use the given manifest let target = Swift.Set(manifest.layers) - let calculateDeduplicatedBytes = { (manifest: OCIManifest) -> Int in - target.intersection(manifest.layers).map({ $0.size }).reduce(0, +) + let calculateDeduplicatedBytes = { (manifest: OCIManifest) -> UInt64 in + target.intersection(manifest.layers).map({ UInt64($0.size) }).reduce(0, +) } // Load OCI VM images and their manifests (if present) - var candidates: [(name: String, vmDir: VMDirectory, manifest: OCIManifest, deduplicatedBytes: Int)] = [] + var candidates: [(name: String, vmDir: VMDirectory, manifest: OCIManifest, deduplicatedBytes: UInt64)] = [] for (name, vmDir, isSymlink) in try list() { if isSymlink { @@ -289,13 +295,13 @@ class VMStorageOCI: PrunableStorage { candidates.append((name.description, vmDir, manifest, calculateDeduplicatedBytes(manifest))) } - // Now, find the best match based on how many bytes we'll de-duplicate + // Now, find the best match based on how many bytes we'll deduplicate let choosen = candidates.max { left, right in return left.deduplicatedBytes < right.deduplicatedBytes } return try choosen.flatMap({ choosen in - try LocalLayerCache(choosen.vmDir.diskURL, choosen.manifest) + try LocalLayerCache(choosen.name, choosen.deduplicatedBytes, choosen.vmDir.diskURL, choosen.manifest) }) } } From 75c44db269b1664b9f155e32dfc61d30c5d38b2c Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Wed, 17 Jul 2024 19:42:55 +0400 Subject: [PATCH 08/10] Do not prematurely read contents from disk --- Sources/tart/OCI/Layerizer/DiskV2.swift | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/Sources/tart/OCI/Layerizer/DiskV2.swift b/Sources/tart/OCI/Layerizer/DiskV2.swift index e99cc4f0..27e55395 100644 --- a/Sources/tart/OCI/Layerizer/DiskV2.swift +++ b/Sources/tart/OCI/Layerizer/DiskV2.swift @@ -182,9 +182,6 @@ class DiskV2: Disk { // If the local layer cache is used, only write chunks that differ // since the base disk can contain anything at any position if let rdisk = rdisk { - try rdisk.seek(toOffset: offset) - let actualContentsOnDisk = try rdisk.read(upToCount: chunk.count) - // F_PUNCHHOLE requires the holes to be aligned to file system block boundaries let isHoleAligned = (offset % fsBlockSize) == 0 && (UInt64(chunk.count) % fsBlockSize) == 0 @@ -196,9 +193,14 @@ class DiskV2: Disk { throw RuntimeError.PullFailed("failed to punch hole: \(details)") } - } else if chunk != actualContentsOnDisk { - try disk.seek(toOffset: offset) - disk.write(chunk) + } else { + try rdisk.seek(toOffset: offset) + let actualContentsOnDisk = try rdisk.read(upToCount: chunk.count) + + if chunk != actualContentsOnDisk { + try disk.seek(toOffset: offset) + disk.write(chunk) + } } offset += UInt64(chunk.count) From a053c6b981202a703beb6c5c1804b94d28c1534b Mon Sep 17 00:00:00 2001 From: Nikolay Edigaryev Date: Thu, 18 Jul 2024 19:07:24 +0400 Subject: [PATCH 09/10] Only consider candidates with deduplicatedBytes more than 0 --- Sources/tart/VMStorageOCI.swift | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Sources/tart/VMStorageOCI.swift b/Sources/tart/VMStorageOCI.swift index a8026342..100189e0 100644 --- a/Sources/tart/VMStorageOCI.swift +++ b/Sources/tart/VMStorageOCI.swift @@ -296,7 +296,9 @@ class VMStorageOCI: PrunableStorage { } // Now, find the best match based on how many bytes we'll deduplicate - let choosen = candidates.max { left, right in + let choosen = candidates.filter { + $0.deduplicatedBytes > 0 + }.max { left, right in return left.deduplicatedBytes < right.deduplicatedBytes } From 5eeb6d8afaee903417df06359ab8298d17391eb5 Mon Sep 17 00:00:00 2001 From: Fedor Korotkov Date: Thu, 25 Jul 2024 11:22:06 -0400 Subject: [PATCH 10/10] APFS reuse UX/DX improvements (#870) * Show how much deduplication happening Improvement to the APFS deduplication logic which checks whether a disk image file `mayShareFileContent` with some other file, and then we put a custom attribute to track the deduplication since there is no way to get this information from APFS itself. It's not 100% accurate but given that OCI cache is immutable the actual disk usage can only be lover than that. * Use string attribute * Update Sources/tart/URL+Prunable.swift Co-authored-by: Nikolay Edigaryev * Added SizeOnDisk colume --------- Co-authored-by: Nikolay Edigaryev --- Package.resolved | 11 ++++++++++- Package.swift | 2 ++ Sources/tart/Commands/List.swift | 5 +++-- Sources/tart/URL+Prunable.swift | 26 ++++++++++++++++++++++++++ Sources/tart/VMDirectory+OCI.swift | 5 +++++ Sources/tart/VMDirectory.swift | 8 ++++++++ 6 files changed, 54 insertions(+), 3 deletions(-) diff --git a/Package.resolved b/Package.resolved index 14a84f2d..26c1d2eb 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "6d48639bc0ea02002de0b4f38fe3fce0ddc9d174f2e56180c2ffcbedb7391ef8", + "originHash" : "2c514a4a1d7e106713db744bee89edb40d75da63e6611990ec2f4b0da53c0455", "pins" : [ { "identity" : "antlr4", @@ -118,6 +118,15 @@ "version" : "1.8.0" } }, + { + "identity" : "swift-xattr", + "kind" : "remoteSourceControl", + "location" : "https://github.com/jozefizso/swift-xattr", + "state" : { + "revision" : "f8605af7b3290dbb235fb182ec6e9035d0c8c3ac", + "version" : "3.0.0" + } + }, { "identity" : "swiftdate", "kind" : "remoteSourceControl", diff --git a/Package.swift b/Package.swift index 5de36ff4..2abb9d62 100644 --- a/Package.swift +++ b/Package.swift @@ -24,6 +24,7 @@ let package = Package( .package(url: "https://github.com/orchetect/SwiftRadix", from: "1.3.1"), .package(url: "https://github.com/groue/Semaphore", from: "0.0.8"), .package(url: "https://github.com/fumoboy007/swift-retry", from: "0.2.3"), + .package(url: "https://github.com/jozefizso/swift-xattr", from: "3.0.0"), ], targets: [ .executableTarget(name: "tart", dependencies: [ @@ -40,6 +41,7 @@ let package = Package( .product(name: "SwiftRadix", package: "SwiftRadix"), .product(name: "Semaphore", package: "Semaphore"), .product(name: "DMRetry", package: "swift-retry"), + .product(name: "XAttr", package: "swift-xattr"), ], exclude: [ "OCI/Reference/Makefile", "OCI/Reference/Reference.g4", diff --git a/Sources/tart/Commands/List.swift b/Sources/tart/Commands/List.swift index 22345972..a15ecb27 100644 --- a/Sources/tart/Commands/List.swift +++ b/Sources/tart/Commands/List.swift @@ -7,6 +7,7 @@ fileprivate struct VMInfo: Encodable { let Name: String let Disk: Int let Size: Int + let SizeOnDisk: Int let Running: Bool let State: String } @@ -38,13 +39,13 @@ struct List: AsyncParsableCommand { if source == nil || source == "local" { infos += sortedInfos(try VMStorageLocal().list().map { (name, vmDir) in - try VMInfo(Source: "local", Name: name, Disk: vmDir.sizeGB(), Size: vmDir.allocatedSizeGB(), Running: vmDir.running(), State: vmDir.state().rawValue) + try VMInfo(Source: "local", Name: name, Disk: vmDir.sizeGB(), Size: vmDir.allocatedSizeGB(), SizeOnDisk: vmDir.allocatedSizeGB() - vmDir.deduplicatedSizeGB(), Running: vmDir.running(), State: vmDir.state().rawValue) }) } if source == nil || source == "oci" { infos += sortedInfos(try VMStorageOCI().list().map { (name, vmDir, _) in - try VMInfo(Source: "OCI", Name: name, Disk: vmDir.sizeGB(), Size: vmDir.allocatedSizeGB(), Running: vmDir.running(), State: vmDir.state().rawValue) + try VMInfo(Source: "OCI", Name: name, Disk: vmDir.sizeGB(), Size: vmDir.allocatedSizeGB(), SizeOnDisk: vmDir.allocatedSizeGB() - vmDir.deduplicatedSizeGB(), Running: vmDir.running(), State: vmDir.state().rawValue) }) } diff --git a/Sources/tart/URL+Prunable.swift b/Sources/tart/URL+Prunable.swift index 57656e2f..12e3dd17 100644 --- a/Sources/tart/URL+Prunable.swift +++ b/Sources/tart/URL+Prunable.swift @@ -1,4 +1,5 @@ import Foundation +import XAttr extension URL: Prunable { var url: URL { @@ -13,7 +14,32 @@ extension URL: Prunable { try resourceValues(forKeys: [.totalFileAllocatedSizeKey]).totalFileAllocatedSize! } + func deduplicatedSizeBytes() throws -> Int { + let values = try resourceValues(forKeys: [.totalFileAllocatedSizeKey, .mayShareFileContentKey]) + // make sure the file's origin file is there and duplication works + var dedublicatedSize = 0 + if values.mayShareFileContent == true { + return Int(deduplicatedBytes()) + } + return 0 + } + func sizeBytes() throws -> Int { try resourceValues(forKeys: [.totalFileSizeKey]).totalFileSize! } + + func setDeduplicatedBytes(_ size: UInt64) { + let data = "\(size)".data(using: .utf8)! + try! self.setExtendedAttribute(name: "run.tart.deduplicated-bytes", value: data) + } + + func deduplicatedBytes() -> UInt64 { + guard let data = try? self.extendedAttributeValue(forName: "run.tart.deduplicated-bytes") else { + return 0 + } + if let strValue = String(data: data, encoding: .utf8) { + return UInt64(strValue) ?? 0 + } + return 0 + } } diff --git a/Sources/tart/VMDirectory+OCI.swift b/Sources/tart/VMDirectory+OCI.swift index 7eb2ddb1..f6ace26d 100644 --- a/Sources/tart/VMDirectory+OCI.swift +++ b/Sources/tart/VMDirectory+OCI.swift @@ -59,6 +59,11 @@ extension VMDirectory { throw RuntimeError.PullFailed("failed to decompress disk: \(error.localizedDescription)") } + if let llc = localLayerCache { + // set custom attribute to remember deduplicated bytes + diskURL.setDeduplicatedBytes(llc.deduplicatedBytes) + } + // Pull VM's NVRAM file layer and store it in an NVRAM file defaultLogger.appendNewLine("pulling NVRAM...") diff --git a/Sources/tart/VMDirectory.swift b/Sources/tart/VMDirectory.swift index ccd7e791..309e4a26 100644 --- a/Sources/tart/VMDirectory.swift +++ b/Sources/tart/VMDirectory.swift @@ -182,6 +182,14 @@ struct VMDirectory: Prunable { try allocatedSizeBytes() / 1000 / 1000 / 1000 } + func deduplicatedSizeBytes() throws -> Int { + try configURL.deduplicatedSizeBytes() + diskURL.deduplicatedSizeBytes() + nvramURL.deduplicatedSizeBytes() + } + + func deduplicatedSizeGB() throws -> Int { + try deduplicatedSizeBytes() / 1000 / 1000 / 1000 + } + func sizeBytes() throws -> Int { try configURL.sizeBytes() + diskURL.sizeBytes() + nvramURL.sizeBytes() }