Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve file attribute caching #855

Merged
merged 3 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 52 additions & 27 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ plugins {

allprojects {
repositories {
mavenLocal()
//mavenLocal()
// Allows you to specify your own repository manager instance.
if (project.hasProperty("s3fs.proxy.url")) {
maven {
Expand All @@ -38,6 +38,50 @@ java {
withJavadocJar()
}

// Configure multiple test sources
testing {
suites {
// Just for self reference, technically this is already configured by default.
val test by getting(JvmTestSuite::class) {
useJUnitJupiter() // already the default.
testType.set(TestSuiteType.UNIT_TEST) // already the default.
}

// testIntegration test sources
val testIntegration by registering(JvmTestSuite::class) {
val self = this
testType.set(TestSuiteType.INTEGRATION_TEST)

// We need to manually add the "main" sources to the classpath.
sourceSets {
named(self.name) {
compileClasspath += sourceSets.main.get().output + sourceSets.test.get().output
runtimeClasspath += sourceSets.main.get().output + sourceSets.test.get().output
}
}

// Inherit implementation, runtime and test dependencies (adds them to the compile classpath)
configurations.named("${self.name}Implementation") {
extendsFrom(configurations.testImplementation.get())
extendsFrom(configurations.runtimeOnly.get())
extendsFrom(configurations.implementation.get())
}

// Make sure the integration test is executed as part of the "check" task.
tasks.named<Task>("check") {
dependsOn(named<JvmTestSuite>(self.name))
}

tasks.named<Task>(self.name) {
mustRunAfter(test)
}

}
}


}

dependencies {
api(platform("software.amazon.awssdk:bom:2.29.9"))
api("software.amazon.awssdk:s3") {
Expand All @@ -49,6 +93,9 @@ dependencies {
exclude("org.slf4j", "slf4j-api")
}
api("com.google.code.findbugs:jsr305:3.0.2")
api("com.github.ben-manes.caffeine:caffeine:2.9.3") {
because("Last version to support JDK 8.")
}

testImplementation("ch.qos.logback:logback-classic:1.5.12")
testImplementation("org.junit.jupiter:junit-jupiter:5.11.3")
Expand Down Expand Up @@ -140,6 +187,10 @@ tasks {
}
}

named<Task>("check") {
dependsOn(named<Task>("testIntegration"))
}

named<Task>("jacocoTestReport") {
group = "jacoco"
dependsOn(named("test")) // tests are required to run before generating the report
Expand All @@ -162,32 +213,10 @@ tasks {
group = "sonar"
}

named<Test>("test") {
description = "Run unit tests"
outputs.upToDateWhen { false }
useJUnitPlatform {
filter {
excludeTestsMatching("*IT")
}
}
}

withType<Test> {
defaultCharacterEncoding = "UTF-8"
}

create<Test>("it-s3") {
group = "verification"
description = "Run integration tests using S3"
useJUnitPlatform {
filter {
includeTestsMatching("*IT")
includeTags("it-s3")
}
}
mustRunAfter(named("test"))
}

// TODO: There are some problems with using minio that overcomplicate the setup.
// For the time being we'll be disabling it until we figure out the best path forward.
// create<Test>("it-minio") {
Expand All @@ -201,10 +230,6 @@ tasks {
// }
// }

named<Task>("check") {
dependsOn(named("it-s3"))
}

withType<Sign> {
onlyIf {
(project.hasProperty("withSignature") && project.findProperty("withSignature") == "true") ||
Expand Down
8 changes: 5 additions & 3 deletions docs/content/contributing/developer-guide/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Before you start writing code, please read:
## System requirements

1. Gradle 8.1, or higher
2. `JDK8`, `JDK11` or `JDK17`
2. `JDK8`, `JDK11`, `JDK17` or `JDK21`

## Finding issues to work on

Expand Down Expand Up @@ -85,7 +85,7 @@ s3fs.proxy.url=https://my.local.domain/path/to/repository
### Build

Builds the entire code and runs unit and integration tests.
It is assumed you already have the `amazon-test.properties` configuration in place.
It is assumed you already have the `amazon-test.properties` configuration in place under the `src/test/resources` or `src/testIntegration/resources`.

```
./gradlew build
Expand All @@ -100,9 +100,11 @@ It is assumed you already have the `amazon-test.properties` configuration in pla
### Run only integration tests

```
./gradlew it-s3
./gradlew testIntegration
```

You can also use `./gradlew build -x testIntegration` to skip the integration tests.

### Run all tests

```
Expand Down
52 changes: 27 additions & 25 deletions docs/content/reference/configuration-options.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,30 @@

A complete list of environment variables which can be set to configure the client.

| Key | Default | Description |
|-------------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------|
| s3fs.access.key | none | <small>AWS access key, used to identify the user interacting with AWS</small> |
| s3fs.secret.key | none | <small>AWS secret access key, used to authenticate the user interacting with AWS</small> |
| s3fs.request.metric.collector.class | TODO | <small>Fully-qualified class name to instantiate an AWS SDK request/response metric collector</small> |
| s3fs.connection.timeout | TODO | <small>Timeout (in milliseconds) for establishing a connection to a remote service</small> |
| s3fs.max.connections | TODO | <small>Maximum number of connections allowed in a connection pool</small> |
| s3fs.max.retry.error | TODO | <small>Maximum number of times that a single request should be retried, assuming it fails for a retryable error</small> |
| s3fs.protocol | TODO | <small>Protocol (HTTP or HTTPS) to use when connecting to AWS</small> |
| s3fs.proxy.domain | none | <small>For NTLM proxies: The Windows domain name to use when authenticating with the proxy</small> |
| s3fs.proxy.protocol | none | <small>Proxy connection protocol.</small> |
| s3fs.proxy.host | none | <small>Proxy host name either from the configured endpoint or from the "http.proxyHost" system property</small> |
| s3fs.proxy.password | none | <small>The password to use when connecting through a proxy</small> |
| s3fs.proxy.port | none | <small>Proxy port either from the configured endpoint or from the "http.proxyPort" system property</small> |
| s3fs.proxy.username | none | <small>The username to use when connecting through a proxy</small> |
| s3fs.proxy.workstation | none | <small>For NTLM proxies: The Windows workstation name to use when authenticating with the proxy</small> |
| s3fs.region | none | <small>The AWS Region to configure the client</small> |
| s3fs.socket.send.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP send buffer</small> |
| s3fs.socket.receive.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP receive buffer</small> |
| s3fs.socket.timeout | TODO | <small>Timeout (in milliseconds) for each read to the underlying socket</small> |
| s3fs.user.agent.prefix | TODO | <small>Prefix of the user agent that is sent with each request to AWS</small> |
| s3fs.amazon.s3.factory.class | TODO | <small>Fully-qualified class name to instantiate a S3 factory base class which creates a S3 client instance</small> |
| s3fs.signer.override | TODO | <small>Fully-qualified class name to define the signer that should be used when authenticating with AWS</small> |
| s3fs.path.style.access | TODO | <small>Boolean that indicates whether the client uses path-style access for all requests</small> |
| s3fs.request.header.cache-control | blank | <small>Configures the `cacheControl` on request builders (i.e. `CopyObjectRequest`, `PutObjectRequest`, etc) |
| Key | Default | Description |
|-------------------------------------|---------|-------------------------------------------------------------------------------------------------------------------------|
| s3fs.access.key | none | <small>AWS access key, used to identify the user interacting with AWS</small> |
| s3fs.secret.key | none | <small>AWS secret access key, used to authenticate the user interacting with AWS</small> |
| s3fs.request.metric.collector.class | TODO | <small>Fully-qualified class name to instantiate an AWS SDK request/response metric collector</small> |
| s3fs.cache.attributes.ttl | `60000` | <small>TTL for the cached file attributes (in millis)</small> |
| s3fs.cache.attributes.size | `5000` | <small>Total size of cached file attributes</small> |
| s3fs.connection.timeout | TODO | <small>Timeout (in milliseconds) for establishing a connection to a remote service</small> |
| s3fs.max.connections | TODO | <small>Maximum number of connections allowed in a connection pool</small> |
| s3fs.max.retry.error | TODO | <small>Maximum number of times that a single request should be retried, assuming it fails for a retryable error</small> |
| s3fs.protocol | TODO | <small>Protocol (HTTP or HTTPS) to use when connecting to AWS</small> |
| s3fs.proxy.domain | none | <small>For NTLM proxies: The Windows domain name to use when authenticating with the proxy</small> |
| s3fs.proxy.protocol | none | <small>Proxy connection protocol.</small> |
| s3fs.proxy.host | none | <small>Proxy host name either from the configured endpoint or from the "http.proxyHost" system property</small> |
| s3fs.proxy.password | none | <small>The password to use when connecting through a proxy</small> |
| s3fs.proxy.port | none | <small>Proxy port either from the configured endpoint or from the "http.proxyPort" system property</small> |
| s3fs.proxy.username | none | <small>The username to use when connecting through a proxy</small> |
| s3fs.proxy.workstation | none | <small>For NTLM proxies: The Windows workstation name to use when authenticating with the proxy</small> |
| s3fs.region | none | <small>The AWS Region to configure the client</small> |
| s3fs.socket.send.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP send buffer</small> |
| s3fs.socket.receive.buffer.size.hint | TODO | <small>The size hint (in bytes) for the low level TCP receive buffer</small> |
| s3fs.socket.timeout | TODO | <small>Timeout (in milliseconds) for each read to the underlying socket</small> |
| s3fs.user.agent.prefix | TODO | <small>Prefix of the user agent that is sent with each request to AWS</small> |
| s3fs.amazon.s3.factory.class | TODO | <small>Fully-qualified class name to instantiate a S3 factory base class which creates a S3 client instance</small> |
| s3fs.signer.override | TODO | <small>Fully-qualified class name to define the signer that should be used when authenticating with AWS</small> |
| s3fs.path.style.access | TODO | <small>Boolean that indicates whether the client uses path-style access for all requests</small> |
| s3fs.request.header.cache-control | blank | <small>Configures the `cacheControl` on request builders (i.e. `CopyObjectRequest`, `PutObjectRequest`, etc) |
14 changes: 14 additions & 0 deletions src/main/java/org/carlspring/cloud/storage/s3fs/S3Factory.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import java.time.Duration;
import java.util.Properties;

import org.carlspring.cloud.storage.s3fs.attribute.S3BasicFileAttributes;
import org.carlspring.cloud.storage.s3fs.attribute.S3PosixFileAttributes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import software.amazon.awssdk.auth.credentials.AwsBasicCredentials;
Expand Down Expand Up @@ -41,6 +43,18 @@ public abstract class S3Factory

public static final String SECRET_KEY = "s3fs.secret.key";

/**
* Maximum TTL in millis to cache {@link S3BasicFileAttributes} and {@link S3PosixFileAttributes}.
*/
public static final String CACHE_ATTRIBUTES_TTL = "s3fs.cache.attributes.ttl";
public static final int CACHE_ATTRIBUTES_TTL_DEFAULT = 60000;

/**
* Total size of {@link S3BasicFileAttributes} and {@link S3PosixFileAttributes} cache.
*/
public static final String CACHE_ATTRIBUTES_SIZE = "s3fs.cache.attributes.size";
public static final int CACHE_ATTRIBUTES_SIZE_DEFAULT = 30000;

public static final String REQUEST_METRIC_COLLECTOR_CLASS = "s3fs.request.metric.collector.class";

public static final String CONNECTION_TIMEOUT = "s3fs.connection.timeout";
Expand Down
34 changes: 25 additions & 9 deletions src/main/java/org/carlspring/cloud/storage/s3fs/S3FileSystem.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
package org.carlspring.cloud.storage.s3fs;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.carlspring.cloud.storage.s3fs.cache.S3FileAttributesCache;
import org.carlspring.cloud.storage.s3fs.util.S3Utils;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.Bucket;

import java.io.IOException;
import java.nio.file.FileStore;
import java.nio.file.FileSystem;
Expand All @@ -10,10 +17,6 @@
import java.util.Properties;
import java.util.Set;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import software.amazon.awssdk.services.s3.S3Client;
import software.amazon.awssdk.services.s3.model.Bucket;
import static org.carlspring.cloud.storage.s3fs.S3Path.PATH_SEPARATOR;

/**
Expand All @@ -34,7 +37,7 @@ public class S3FileSystem

private final String endpoint;

private final int cache;
private S3FileAttributesCache fileAttributesCache;

private final Properties properties;

Expand All @@ -48,8 +51,12 @@ public S3FileSystem(final S3FileSystemProvider provider,
this.key = key;
this.client = client;
this.endpoint = endpoint;
this.cache = 60000; // 1 minute cache for the s3Path
this.properties = properties;

int cacheTTL = Integer.parseInt(String.valueOf(properties.getOrDefault(S3Factory.CACHE_ATTRIBUTES_TTL, S3Factory.CACHE_ATTRIBUTES_TTL_DEFAULT)));
int cacheSize = Integer.parseInt(String.valueOf(properties.getOrDefault(S3Factory.CACHE_ATTRIBUTES_SIZE, S3Factory.CACHE_ATTRIBUTES_SIZE_DEFAULT)));

this.fileAttributesCache = new S3FileAttributesCache(cacheTTL, cacheSize);
}

public S3FileSystem(final S3FileSystemProvider provider,
Expand All @@ -75,6 +82,7 @@ public String getKey()
public void close()
throws IOException
{
this.fileAttributesCache.invalidateAll();
this.provider.close(this);
}

Expand Down Expand Up @@ -171,14 +179,22 @@ public String getEndpoint()
return endpoint;
}

/**
* @deprecated Use {@link org.carlspring.cloud.storage.s3fs.util.S3Utils#key2Parts(String)} instead. To be removed in one of next majors versions.
* @param keyParts
* @return String[]
*/
public String[] key2Parts(String keyParts)
{
return keyParts.split(PATH_SEPARATOR);
return S3Utils.key2Parts(keyParts);
}

public int getCache()
/**
* @return The {@link S3FileAttributesCache} instance holding the path attributes cache for this file provider.
*/
public S3FileAttributesCache getFileAttributesCache()
{
return cache;
return fileAttributesCache;
}

/**
Expand Down
Loading
Loading