Skip to content

Commit 8ad6314

Browse files
committed
added some docs and error handling
1 parent cdd122a commit 8ad6314

File tree

5 files changed

+135
-14
lines changed

5 files changed

+135
-14
lines changed

README.md

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,68 @@
1-
# hdfs-group-mapper
1+
# Stackable Group Mapper for Apache Hadoop
2+
3+
HDFS internally uses user groups for group permissions on folders etc. For this reason it is not enough to just have the groups in OPA during authorization, but they actually need to be available to Hadoop. Hadoop offers a few default group providers, such as:
4+
5+
* LDAP
6+
* Linux user group (usually provided by SSSD or Centrify or similar tools)
7+
8+
Hadoop exposes an [interface](https://github.com/apache/hadoop/blob/rel/release-3.3.6/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/GroupMappingServiceProvider.java) that users can implement to extend these [group mappings](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/GroupsMapping.html). The Stackable Group Mapper does this to look up user groups from OPA.
9+
10+
## OPA Mappings
11+
12+
OPA mappings are returned from the [User-Info-Fetcher](https://docs.stackable.tech/home/nightly/opa/usage-guide/user-info-fetcher#_example_rego_rule) in this form:
13+
14+
```json
15+
{
16+
"id": "af07f12c-a2db-40a7-93e0-874537bdf3f5",
17+
"username": "alice",
18+
"groups": [
19+
"/superset-admin"
20+
],
21+
"customAttributes": {}
22+
}
23+
```
24+
25+
The Group Mapper only needs the group listing, which can be requested specifically from the Opa server by providing the current user and filtering out the groups with the `json.filter` function, returning a segment that looks like this:
26+
27+
```json
28+
{
29+
"result": {
30+
"groups": {
31+
"groups": [
32+
"/admin",
33+
"/superuser"
34+
]
35+
}
36+
}
37+
}
38+
```
39+
40+
The leading slash is required by Opa/Keycloak to allow the definition of subgroups, but this is removed by the group mapper before returning this list of strings to the internal calling routine.
41+
42+
## Configuration
43+
44+
Group mappings are resolved on the NameNode and the following configuration should be added to the NameNode role:
45+
46+
### envOverrides
47+
48+
#### HADOOP_CLASSPATH
49+
50+
* Fixed value of `"/stackable/hadoop/share/hadoop/tools/lib/*.jar"`
51+
52+
### configOverrides / `core-site.xml`
53+
54+
#### hadoop.security.group.mapping
55+
56+
* Fixed value of `"tech.stackable.hadoop.StackableGroupMapper"`
57+
58+
#### hadoop.security.group.mapping.opa.url
59+
60+
* The Opa Server endpoint e.g. `"http://test-opa.default.svc.cluster.local:8081/v1/data/hdfs"`
61+
62+
#### hadoop.user.group.static.mapping.overrides
63+
64+
* The hdfs-operator will add a default static mapping whenever kerberos is activated. This should be removed so that the mapping implementation can provide this information instead: i.e. with an empty string `""`
65+
66+
## Testing
67+
68+
CRDs for spinning up test infrastructure are provided in `test/stack`. The Tiltfile will deploy these resources, build and copy the mapper to the docker image, and re-deploy the image to the running HdfsCluster.
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package tech.stackable.hadoop;
2+
3+
import java.net.URI;
4+
import java.net.http.HttpResponse;
5+
6+
import static tech.stackable.hadoop.StackableGroupMapper.OPA_MAPPING_URL_PROP;
7+
8+
public abstract class OpaException extends RuntimeException {
9+
10+
public OpaException(String message, Throwable cause) {
11+
super(message, cause);
12+
}
13+
14+
public static final class UriInvalid extends OpaException {
15+
public UriInvalid(URI uri, Throwable cause) {
16+
super("Open Policy Agent URI is invalid (see configuration property \""
17+
+ OPA_MAPPING_URL_PROP + "\"): " + uri, cause);
18+
}
19+
}
20+
21+
public static final class EndPointNotFound extends OpaException {
22+
public EndPointNotFound(String url) {
23+
super("Open Policy Agent URI is unreachable (see configuration property \""
24+
+ OPA_MAPPING_URL_PROP + "\"): " + url, null);
25+
}
26+
}
27+
28+
public static final class OpaServerError extends OpaException {
29+
public <T> OpaServerError(String query, HttpResponse<T> response) {
30+
super("OPA server returned status " + response.statusCode() + " when processing query "
31+
+ query + ": " + response.body(), null);
32+
}
33+
}
34+
}

src/main/java/tech/stackable/hadoop/StackableGroupMapper.java

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,29 @@
1414
import java.net.http.HttpRequest;
1515
import java.net.http.HttpResponse;
1616
import java.util.List;
17+
import java.util.Objects;
1718

1819
public class StackableGroupMapper implements GroupMappingServiceProvider {
19-
private static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.url";
20+
public static final String OPA_MAPPING_URL_PROP = "hadoop.security.group.mapping.opa.url";
2021
private final Logger LOG = LoggerFactory.getLogger(StackableGroupMapper.class);
21-
private final Configuration configuration;
2222
private final HttpClient httpClient = HttpClient.newHttpClient();
2323
private final ObjectMapper json;
24+
private URI opaUri = null;
2425

2526
public StackableGroupMapper() {
26-
this.configuration = new Configuration();
27+
Configuration configuration = new Configuration();
28+
29+
String opaMappingUrl = configuration.get(OPA_MAPPING_URL_PROP);
30+
if (opaMappingUrl == null) {
31+
throw new RuntimeException("Config \"" + OPA_MAPPING_URL_PROP + "\" missing");
32+
}
33+
34+
try {
35+
this.opaUri = URI.create(opaMappingUrl);
36+
} catch (Exception e) {
37+
throw new OpaException.UriInvalid(opaUri, e);
38+
}
39+
2740
this.json = new ObjectMapper()
2841
// https://github.com/stackabletech/trino-opa-authorizer/issues/24
2942
// OPA server can send other fields, such as `decision_id`` when enabling decision logs
@@ -43,15 +56,7 @@ public StackableGroupMapper() {
4356
public List<String> getGroups(String user) throws IOException {
4457
LOG.info("Calling StackableGroupMapper.getGroups for user [{}]", user);
4558

46-
String opaMappingUrl = configuration.get(OPA_MAPPING_URL_PROP);
47-
48-
if (opaMappingUrl == null) {
49-
throw new RuntimeException("Config \"" + OPA_MAPPING_URL_PROP + "\" missing");
50-
}
51-
52-
URI opaUri = URI.create(opaMappingUrl);
5359
HttpResponse<String> response = null;
54-
5560
OpaQuery query = new OpaQuery(new OpaQuery.OpaQueryInput(user));
5661
String body = json.writeValueAsString(query);
5762

@@ -66,9 +71,15 @@ public List<String> getGroups(String user) throws IOException {
6671
LOG.error(e.getMessage());
6772
}
6873

69-
if (response == null || response.statusCode() != 200) {
70-
throw new IOException(opaUri.toString());
74+
switch (Objects.requireNonNull(response).statusCode()) {
75+
case 200:
76+
break;
77+
case 404:
78+
throw new OpaException.EndPointNotFound(opaUri.toString());
79+
default:
80+
throw new OpaException.OpaServerError(query.toString(), response);
7181
}
82+
7283
String responseBody = response.body();
7384
LOG.debug("Response body [{}]", responseBody);
7485

test/stack/05-opa.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,14 @@ data:
99
test.rego: |
1010
package hdfs
1111
12+
# this will return the group data in this form:
13+
# "result": {
14+
# "groups":
15+
# "groups": [
16+
# "/admin",
17+
# "/superuser"
18+
# ]
19+
# },...
1220
groups := json.filter(users_by_name[input.username], ["groups"])
1321
1422
# returning data in the form presented by the UIF

test/stack/10-hdfs.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ spec:
4141
HADOOP_CLASSPATH: "/stackable/hadoop/share/hadoop/tools/lib/*.jar"
4242
configOverrides:
4343
core-site.xml:
44+
# the mapper is only handled on the namenode so no need to apply this config to all roles
4445
hadoop.security.group.mapping: "tech.stackable.hadoop.StackableGroupMapper"
4546
hadoop.security.group.mapping.opa.url: "http://test-opa.default.svc.cluster.local:8081/v1/data/hdfs"
4647
# The operator adds a default static mapping when kerberos is activated, see:

0 commit comments

Comments
 (0)