From c5f21b2b01deb10a542455b95285860a53f1f4d0 Mon Sep 17 00:00:00 2001
From: Tom Crasset <25140344+tcrasset@users.noreply.github.com>
Date: Fri, 17 Jan 2025 10:03:17 +0100
Subject: [PATCH] fix S3 per-user-directory Policy (#6443)

* fix S3 per-user-directory Policy

* Delete docker/config.json

* add tests

* remove logs

* undo modifications of weed/shell/command_volume_balance.go

* remove modifications of docker-compose

* fix failing test

---------

Co-authored-by: Chris Lu <chrislusf@users.noreply.github.com>
---
 docker/compose/local-dev-compose.yml          |  21 +-
 docker/test.py                                | 274 ++++++++++++++++++
 weed/command/iam.go                           |   5 +-
 weed/iamapi/iamapi_management_handlers.go     |  17 +-
 .../iamapi/iamapi_management_handlers_test.go |  71 +++++
 weed/s3api/auth_credentials.go                |  21 +-
 weed/s3api/s3_constants/header.go             |  13 +-
 7 files changed, 404 insertions(+), 18 deletions(-)
 create mode 100755 docker/test.py
 create mode 100644 weed/iamapi/iamapi_management_handlers_test.go

diff --git a/docker/compose/local-dev-compose.yml b/docker/compose/local-dev-compose.yml
index 9cdd76dfd..6f0d0fb29 100644
--- a/docker/compose/local-dev-compose.yml
+++ b/docker/compose/local-dev-compose.yml
@@ -6,7 +6,7 @@ services:
     ports:
       - 9333:9333
       - 19333:19333
-    command: "-v=1 master -ip=master"
+    command: "-v=1 master -ip=master -volumeSizeLimitMB=10"
     volumes:
       - ./tls:/etc/seaweedfs/tls
     env_file:
@@ -16,7 +16,7 @@ services:
     ports:
       - 8080:8080
       - 18080:18080
-    command: "-v=1 volume -mserver=master:9333 -port=8080 -ip=volume -preStopSeconds=1"
+    command: "-v=1 volume -mserver=master:9333 -port=8080 -ip=volume -preStopSeconds=1 -max=10000"
     depends_on:
       - master
     volumes:
@@ -26,10 +26,9 @@ services:
   filer:
     image: chrislusf/seaweedfs:local
     ports:
-      - 8111:8111
       - 8888:8888
       - 18888:18888
-    command: '-v=1 filer -ip.bind=0.0.0.0 -master="master:9333" -iam -iam.ip=filer'
+    command: '-v=1 filer -ip.bind=0.0.0.0 -master="master:9333"'
     depends_on:
       - master
       - volume
@@ -37,6 +36,19 @@ services:
       - ./tls:/etc/seaweedfs/tls
     env_file:
       - ${ENV_FILE:-dev.env}
+
+  iam:
+    image: chrislusf/seaweedfs:local
+    ports:
+      - 8111:8111
+    command: '-v=1 iam -filer="filer:8888" -master="master:9333"'
+    depends_on:
+      - master
+      - volume
+      - filer
+    volumes:
+      - ./tls:/etc/seaweedfs/tls
+
   s3:
     image: chrislusf/seaweedfs:local
     ports:
@@ -50,6 +62,7 @@ services:
       - ./tls:/etc/seaweedfs/tls
     env_file:
       - ${ENV_FILE:-dev.env}
+
   mount:
     image: chrislusf/seaweedfs:local
     privileged: true
diff --git a/docker/test.py b/docker/test.py
new file mode 100755
index 000000000..8ac025b32
--- /dev/null
+++ b/docker/test.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.12"
+# dependencies = [
+#     "boto3",
+# ]
+# ///
+
+import argparse
+import json
+import random
+import string
+import subprocess
+from enum import Enum
+from pathlib import Path
+
+import boto3
+
+REGION_NAME = "us-east-1"
+
+
+class Actions(str, Enum):
+    Get = "Get"
+    Put = "Put"
+    List = "List"
+
+
+def get_user_dir(bucket_name, user, with_bucket=True):
+    if with_bucket:
+        return f"{bucket_name}/user-id-{user}"
+
+    return f"user-id-{user}"
+
+
+def create_power_user():
+    power_user_key = "power_user_key"
+    power_user_secret = "power_user_secret"
+    command = f"s3.configure -apply -user poweruser -access_key {power_user_key} -secret_key {power_user_secret} -actions Admin"
+    print("Creating Power User...")
+    subprocess.run(
+        ["docker", "exec", "-i", "seaweedfs-master-1", "weed", "shell"],
+        input=command,
+        text=True,
+        stdout=subprocess.PIPE,
+    )
+    print(
+        f"Power User created with key: {power_user_key} and secret: {power_user_secret}"
+    )
+    return power_user_key, power_user_secret
+
+
+def create_bucket(s3_client, bucket_name):
+    print(f"Creating Bucket {bucket_name}...")
+    s3_client.create_bucket(Bucket=bucket_name)
+    print(f"Bucket {bucket_name} created.")
+
+
+def upload_file(s3_client, bucket_name, user, file_path, custom_remote_path=None):
+    user_dir = get_user_dir(bucket_name, user, with_bucket=False)
+    if custom_remote_path:
+        remote_path = custom_remote_path
+    else:
+        remote_path = f"{user_dir}/{str(Path(file_path).name)}"
+
+    print(f"Uploading {file_path} for {user}... on {user_dir}")
+
+    s3_client.upload_file(file_path, bucket_name, remote_path)
+    print(f"File {file_path} uploaded for {user}.")
+
+
+def create_user(iam_client, user):
+    print(f"Creating user {user}...")
+    response = iam_client.create_access_key(UserName=user)
+    print(
+        f"User {user} created with access key: {response['AccessKey']['AccessKeyId']}"
+    )
+    return response
+
+
+def list_files(s3_client, bucket_name, path=None):
+    if path is None:
+        path = ""
+    print(f"Listing files of s3://{bucket_name}/{path}...")
+    try:
+        response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=path)
+        if "Contents" in response:
+            for obj in response["Contents"]:
+                print(f"\t - {obj['Key']}")
+        else:
+            print("No files found.")
+    except Exception as e:
+        print(f"Error listing files: {e}")
+
+
+def create_policy_for_user(
+    iam_client, user, bucket_name, actions=[Actions.Get, Actions.List]
+):
+    print(f"Creating policy for {user} on {bucket_name}...")
+    policy_document = {
+        "Version": "2012-10-17",
+        "Statement": [
+            {
+                "Effect": "Allow",
+                "Action": [f"s3:{action.value}*" for action in actions],
+                "Resource": [
+                    f"arn:aws:s3:::{get_user_dir(bucket_name, user)}/*",
+                ],
+            }
+        ],
+    }
+    policy_name = f"{user}-{bucket_name}-full-access"
+
+    policy_json = json.dumps(policy_document)
+    filepath = f"/tmp/{policy_name}.json"
+    with open(filepath, "w") as f:
+        f.write(json.dumps(policy_document, indent=2))
+
+    iam_client.put_user_policy(
+        PolicyName=policy_name, PolicyDocument=policy_json, UserName=user
+    )
+    print(f"Policy for {user} on {bucket_name} created.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="SeaweedFS S3 Test Script")
+    parser.add_argument(
+        "--s3-url", default="http://127.0.0.1:8333", help="S3 endpoint URL"
+    )
+    parser.add_argument(
+        "--iam-url", default="http://127.0.0.1:8111", help="IAM endpoint URL"
+    )
+    args = parser.parse_args()
+
+    bucket_name = (
+        f"test-bucket-{''.join(random.choices(string.digits + 'abcdef', k=8))}"
+    )
+    sentinel_file = "/tmp/SENTINEL"
+    with open(sentinel_file, "w") as f:
+        f.write("Hello World")
+    print(f"SENTINEL file created at {sentinel_file}")
+
+    power_user_key, power_user_secret = create_power_user()
+
+    admin_s3_client = get_s3_client(args, power_user_key, power_user_secret)
+    iam_client = get_iam_client(args, power_user_key, power_user_secret)
+
+    create_bucket(admin_s3_client, bucket_name)
+    upload_file(admin_s3_client, bucket_name, "Alice", sentinel_file)
+    upload_file(admin_s3_client, bucket_name, "Bob", sentinel_file)
+    list_files(admin_s3_client, bucket_name)
+
+    alice_user_info = create_user(iam_client, "Alice")
+    bob_user_info = create_user(iam_client, "Bob")
+
+    alice_key = alice_user_info["AccessKey"]["AccessKeyId"]
+    alice_secret = alice_user_info["AccessKey"]["SecretAccessKey"]
+    bob_key = bob_user_info["AccessKey"]["AccessKeyId"]
+    bob_secret = bob_user_info["AccessKey"]["SecretAccessKey"]
+
+    # Make sure Admin can read any files
+    list_files(admin_s3_client, bucket_name)
+    list_files(
+        admin_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Alice", with_bucket=False),
+    )
+    list_files(
+        admin_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Bob", with_bucket=False),
+    )
+
+    # Create read policy for Alice and Bob
+    create_policy_for_user(iam_client, "Alice", bucket_name)
+    create_policy_for_user(iam_client, "Bob", bucket_name)
+
+    alice_s3_client = get_s3_client(args, alice_key, alice_secret)
+
+    # Make sure Alice can read her files
+    list_files(
+        alice_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Alice", with_bucket=False) + "/",
+    )
+
+    # Make sure Bob can read his files
+    bob_s3_client = get_s3_client(args, bob_key, bob_secret)
+    list_files(
+        bob_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Bob", with_bucket=False) + "/",
+    )
+
+    # Update policy to include write
+    create_policy_for_user(iam_client, "Alice", bucket_name, actions=[Actions.Put, Actions.Get, Actions.List])  # fmt: off
+    create_policy_for_user(iam_client, "Bob", bucket_name, actions=[Actions.Put, Actions.Get, Actions.List])  # fmt: off
+
+    print("############################# Make sure Alice can write her files")
+    upload_file(
+        alice_s3_client,
+        bucket_name,
+        "Alice",
+        sentinel_file,
+        custom_remote_path=f"{get_user_dir(bucket_name, 'Alice', with_bucket=False)}/SENTINEL_by_Alice",
+    )
+
+
+    print("############################# Make sure Bob can write his files")
+    upload_file(
+        bob_s3_client,
+        bucket_name,
+        "Bob",
+        sentinel_file,
+        custom_remote_path=f"{get_user_dir(bucket_name, 'Bob', with_bucket=False)}/SENTINEL_by_Bob",
+    )
+
+
+    print("############################# Make sure Alice can read her new files")
+    list_files(
+        alice_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Alice", with_bucket=False) + "/",
+    )
+
+
+    print("############################# Make sure Bob can read his new files")
+    list_files(
+        bob_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Bob", with_bucket=False) + "/",
+    )
+
+
+    print("############################# Make sure Bob cannot read Alice's files")
+    list_files(
+        bob_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Alice", with_bucket=False) + "/",
+    )
+
+    print("############################# Make sure Alice cannot read Bob's files")
+
+    list_files(
+        alice_s3_client,
+        bucket_name,
+        get_user_dir(bucket_name, "Bob", with_bucket=False) + "/",
+    )
+
+
+
+def get_iam_client(args, access_key, secret_key):
+    iam_client = boto3.client(
+        "iam",
+        endpoint_url=args.iam_url,
+        region_name=REGION_NAME,
+        aws_access_key_id=access_key,
+        aws_secret_access_key=secret_key,
+    )
+    return iam_client
+
+
+def get_s3_client(args, access_key, secret_key):
+    s3_client = boto3.client(
+        "s3",
+        endpoint_url=args.s3_url,
+        region_name=REGION_NAME,
+        aws_access_key_id=access_key,
+        aws_secret_access_key=secret_key,
+    )
+    return s3_client
+
+
+if __name__ == "__main__":
+    main()
diff --git a/weed/command/iam.go b/weed/command/iam.go
index fa21803dd..f4a7df2ca 100644
--- a/weed/command/iam.go
+++ b/weed/command/iam.go
@@ -5,6 +5,8 @@ import (
 	"fmt"
 	"net/http"
 
+	"time"
+
 	"github.com/gorilla/mux"
 	"github.com/seaweedfs/seaweedfs/weed/glog"
 	"github.com/seaweedfs/seaweedfs/weed/iamapi"
@@ -12,7 +14,6 @@ import (
 	"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
 	"github.com/seaweedfs/seaweedfs/weed/security"
 	"github.com/seaweedfs/seaweedfs/weed/util"
-	"time"
 )
 
 var (
@@ -35,7 +36,7 @@ func init() {
 }
 
 var cmdIam = &Command{
-	UsageLine: "iam [-port=8111] [-filer=<ip:port>] [-masters=<ip:port>,<ip:port>]",
+	UsageLine: "iam [-port=8111] [-filer=<ip:port>] [-master=<ip:port>,<ip:port>]",
 	Short:     "start a iam API compatible server",
 	Long:      "start a iam API compatible server.",
 }
diff --git a/weed/iamapi/iamapi_management_handlers.go b/weed/iamapi/iamapi_management_handlers.go
index e5c533e27..baa153cd6 100644
--- a/weed/iamapi/iamapi_management_handlers.go
+++ b/weed/iamapi/iamapi_management_handlers.go
@@ -332,26 +332,23 @@ func GetActions(policy *PolicyDocument) ([]string, error) {
 			// Parse "arn:aws:s3:::my-bucket/shared/*"
 			res := strings.Split(resource, ":")
 			if len(res) != 6 || res[0] != "arn" || res[1] != "aws" || res[2] != "s3" {
-				return nil, fmt.Errorf("not a valid resource: '%s'. Expected prefix 'arn:aws:s3'", res)
+				glog.Infof("not a valid resource: %s", res)
+				continue
 			}
 			for _, action := range statement.Action {
 				// Parse "s3:Get*"
 				act := strings.Split(action, ":")
 				if len(act) != 2 || act[0] != "s3" {
-					return nil, fmt.Errorf("not a valid action: '%s'. Expected prefix 's3:'", act)
+					glog.Infof("not a valid action: %s", act)
+					continue
 				}
 				statementAction := MapToStatementAction(act[1])
-				if res[5] == "*" {
+				path := res[5]
+				if path == "*" {
 					actions = append(actions, statementAction)
 					continue
 				}
-				// Parse my-bucket/shared/*
-				path := strings.Split(res[5], "/")
-				if len(path) != 2 || path[1] != "*" {
-					glog.Infof("not match bucket: %s", path)
-					continue
-				}
-				actions = append(actions, fmt.Sprintf("%s:%s", statementAction, path[0]))
+				actions = append(actions, fmt.Sprintf("%s:%s", statementAction, path))
 			}
 		}
 	}
diff --git a/weed/iamapi/iamapi_management_handlers_test.go b/weed/iamapi/iamapi_management_handlers_test.go
new file mode 100644
index 000000000..9b4a92c24
--- /dev/null
+++ b/weed/iamapi/iamapi_management_handlers_test.go
@@ -0,0 +1,71 @@
+package iamapi
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestGetActionsUserPath(t *testing.T) {
+
+	policyDocument := PolicyDocument{
+		Version: "2012-10-17",
+		Statement: []*Statement{
+			{
+				Effect: "Allow",
+				Action: []string{
+					"s3:Put*",
+					"s3:PutBucketAcl",
+					"s3:Get*",
+					"s3:GetBucketAcl",
+					"s3:List*",
+					"s3:Tagging*",
+					"s3:DeleteBucket*",
+				},
+				Resource: []string{
+					"arn:aws:s3:::shared/user-Alice/*",
+				},
+			},
+		},
+	}
+
+	actions, _ := GetActions(&policyDocument)
+
+	expectedActions := []string{
+		"Write:shared/user-Alice/*",
+		"WriteAcp:shared/user-Alice/*",
+		"Read:shared/user-Alice/*",
+		"ReadAcp:shared/user-Alice/*",
+		"List:shared/user-Alice/*",
+		"Tagging:shared/user-Alice/*",
+		"DeleteBucket:shared/user-Alice/*",
+	}
+	assert.Equal(t, expectedActions, actions)
+}
+
+func TestGetActionsWildcardPath(t *testing.T) {
+
+	policyDocument := PolicyDocument{
+		Version: "2012-10-17",
+		Statement: []*Statement{
+			{
+				Effect: "Allow",
+				Action: []string{
+					"s3:Get*",
+					"s3:PutBucketAcl",
+				},
+				Resource: []string{
+					"arn:aws:s3:::*",
+				},
+			},
+		},
+	}
+
+	actions, _ := GetActions(&policyDocument)
+
+	expectedActions := []string{
+		"Read",
+		"WriteAcp",
+	}
+	assert.Equal(t, expectedActions, actions)
+}
diff --git a/weed/s3api/auth_credentials.go b/weed/s3api/auth_credentials.go
index 505c49a12..e80773993 100644
--- a/weed/s3api/auth_credentials.go
+++ b/weed/s3api/auth_credentials.go
@@ -119,11 +119,14 @@ func NewIdentityAccessManagement(option *S3ApiServerOption) *IdentityAccessManag
 		hashes:       make(map[string]*sync.Pool),
 		hashCounters: make(map[string]*int32),
 	}
+
 	if option.Config != "" {
+		glog.V(3).Infof("loading static config file %s", option.Config)
 		if err := iam.loadS3ApiConfigurationFromFile(option.Config); err != nil {
 			glog.Fatalf("fail to load config file %s: %v", option.Config, err)
 		}
 	} else {
+		glog.V(3).Infof("no static config file specified... loading config from filer %s", option.Filer)
 		if err := iam.loadS3ApiConfigurationFromFiler(option); err != nil {
 			glog.Warningf("fail to load config: %v", err)
 		}
@@ -134,6 +137,7 @@ func NewIdentityAccessManagement(option *S3ApiServerOption) *IdentityAccessManag
 func (iam *IdentityAccessManagement) loadS3ApiConfigurationFromFiler(option *S3ApiServerOption) (err error) {
 	var content []byte
 	err = pb.WithFilerClient(false, 0, option.Filer, option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
+		glog.V(3).Infof("loading config %s from filer %s", filer.IamConfigDirectory+"/"+filer.IamIdentityFile, option.Filer)
 		content, err = filer.ReadInsideFiler(client, filer.IamConfigDirectory, filer.IamIdentityFile)
 		return err
 	})
@@ -179,6 +183,7 @@ func (iam *IdentityAccessManagement) loadS3ApiConfiguration(config *iam_pb.S3Api
 	foundAccountAnonymous := false
 
 	for _, account := range config.Accounts {
+		glog.V(3).Infof("loading account  name=%s, id=%s", account.DisplayName, account.Id)
 		switch account.Id {
 		case AccountAdmin.Id:
 			AccountAdmin = Account{
@@ -217,6 +222,7 @@ func (iam *IdentityAccessManagement) loadS3ApiConfiguration(config *iam_pb.S3Api
 		emailAccount[AccountAnonymous.EmailAddress] = &AccountAnonymous
 	}
 	for _, ident := range config.Identities {
+		glog.V(3).Infof("loading identity %s", ident.Name)
 		t := &Identity{
 			Name:        ident.Name,
 			Credentials: nil,
@@ -236,6 +242,7 @@ func (iam *IdentityAccessManagement) loadS3ApiConfiguration(config *iam_pb.S3Api
 				glog.Warningf("identity %s is associated with a non exist account ID, the association is invalid", ident.Name)
 			}
 		}
+
 		for _, action := range ident.Actions {
 			t.Actions = append(t.Actions, Action(action))
 		}
@@ -379,8 +386,14 @@ func (iam *IdentityAccessManagement) authRequest(r *http.Request, action Action)
 	}
 
 	glog.V(3).Infof("user name: %v actions: %v, action: %v", identity.Name, identity.Actions, action)
-
 	bucket, object := s3_constants.GetBucketAndObject(r)
+	prefix := s3_constants.GetPrefix(r)
+
+	if object == "/" && prefix != "" {
+		// Using the aws cli with s3, and s3api, and with boto3, the object is always set to "/"
+		// but the prefix is set to the actual object key
+		object = prefix
+	}
 
 	if !identity.canDo(action, bucket, object) {
 		return identity, s3err.ErrAccessDenied
@@ -447,6 +460,10 @@ func (identity *Identity) canDo(action Action, bucket string, objectKey string)
 		return true
 	}
 	for _, a := range identity.Actions {
+		// Case where the Resource provided is
+		// 	"Resource": [
+		//		"arn:aws:s3:::*"
+		//	]
 		if a == action {
 			return true
 		}
@@ -455,10 +472,12 @@ func (identity *Identity) canDo(action Action, bucket string, objectKey string)
 		glog.V(3).Infof("identity %s is not allowed to perform action %s on %s -- bucket is empty", identity.Name, action, bucket+objectKey)
 		return false
 	}
+	glog.V(3).Infof("checking if %s can perform %s on bucket '%s'", identity.Name, action, bucket+objectKey)
 	target := string(action) + ":" + bucket + objectKey
 	adminTarget := s3_constants.ACTION_ADMIN + ":" + bucket + objectKey
 	limitedByBucket := string(action) + ":" + bucket
 	adminLimitedByBucket := s3_constants.ACTION_ADMIN + ":" + bucket
+
 	for _, a := range identity.Actions {
 		act := string(a)
 		if strings.HasSuffix(act, "*") {
diff --git a/weed/s3api/s3_constants/header.go b/weed/s3api/s3_constants/header.go
index 765b2a0f1..82b2b116a 100644
--- a/weed/s3api/s3_constants/header.go
+++ b/weed/s3api/s3_constants/header.go
@@ -17,9 +17,10 @@
 package s3_constants
 
 import (
-	"github.com/gorilla/mux"
 	"net/http"
 	"strings"
+
+	"github.com/gorilla/mux"
 )
 
 // Standard S3 HTTP request constants
@@ -72,6 +73,16 @@ func GetBucketAndObject(r *http.Request) (bucket, object string) {
 	return
 }
 
+func GetPrefix(r *http.Request) string {
+	query := r.URL.Query()
+	prefix := query.Get("prefix")
+	if !strings.HasPrefix(prefix, "/") {
+		prefix = "/" + prefix
+	}
+
+	return prefix
+}
+
 var PassThroughHeaders = map[string]string{
 	"response-cache-control":       "Cache-Control",
 	"response-content-disposition": "Content-Disposition",