diff --git a/build-images.sh b/build-images.sh index be502829e8..c41c8e9a47 100755 --- a/build-images.sh +++ b/build-images.sh @@ -78,6 +78,8 @@ case $CLUSTER_TYPE in --build-arg="HADOOP_VERSION=${HADOOP_VERSION}" \ --build-arg="SSM_APP_VERSION=${SSM_APP_VERSION}" . + docker build -f ./supports/tools/docker/multihost/kerberos/Dockerfile-kdc -t cloud-hub.adsw.io/library/ssm-kdc-server:${HADOOP_VERSION} . + docker build -f ./supports/tools/docker/multihost/datanode/Dockerfile-hadoop-datanode -t cloud-hub.adsw.io/library/hadoop-datanode:${HADOOP_VERSION} . docker build -f ./supports/tools/docker/multihost/namenode/Dockerfile-hadoop-namenode -t cloud-hub.adsw.io/library/hadoop-namenode:${HADOOP_VERSION} . diff --git a/smart-hadoop-support/smart-hadoop/src/test/java/org/smartdata/hdfs/action/TestCheckSumAction.java b/smart-hadoop-support/smart-hadoop/src/test/java/org/smartdata/hdfs/action/TestCheckSumAction.java index 79dc8ddeed..d3fb244331 100644 --- a/smart-hadoop-support/smart-hadoop/src/test/java/org/smartdata/hdfs/action/TestCheckSumAction.java +++ b/smart-hadoop-support/smart-hadoop/src/test/java/org/smartdata/hdfs/action/TestCheckSumAction.java @@ -24,11 +24,16 @@ import org.smartdata.action.ActionException; import org.smartdata.hdfs.MiniClusterHarness; -import java.io.FileNotFoundException; import java.io.IOException; +import java.io.UnsupportedEncodingException; import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; import java.util.HashMap; +import java.util.List; import java.util.Map; +import java.util.Optional; +import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; @@ -60,11 +65,10 @@ public void testCheckSumAction() throws IOException { action.init(args); action.run(); - String expectedLog = "/testPath/file1\tMD5-of-1MD5-of-50CRC32C\t" - + "000000320000000000000001cd5359474b0be93eb57b7f1aaf9f3f55\n"; + List expectedChecksumFiles = Collections.singletonList("/testPath/file1"); assertTrue(action.getExpectedAfterRun()); - assertEquals(expectedLog, action.getActionStatus().getLog()); + assertEquals(expectedChecksumFiles, getChecksumFiles()); } @Test @@ -82,16 +86,14 @@ public void testCheckSumActionDirectoryArg() throws IOException { action.init(args); action.run(); - String expectedLog = - "/testPath/0\tMD5-of-0MD5-of-50CRC32C\t" - + "00000032000000000000000067ec113c30452f3ebfda70343c1363cf\n" - + "/testPath/1\tMD5-of-0MD5-of-50CRC32C\t" - + "000000320000000000000000ecaf7978b63f94cc35068ff56ae97ecb\n" - + "/testPath/2\tMD5-of-0MD5-of-50CRC32C\t" - + "000000320000000000000000e90604bcd8b102008713620df0a3e56f\n"; + List expectedChecksumFiles = Arrays.asList( + "/testPath/0", + "/testPath/1", + "/testPath/2" + ); assertTrue(action.getExpectedAfterRun()); - assertEquals(expectedLog, action.getActionStatus().getLog()); + assertEquals(expectedChecksumFiles, getChecksumFiles()); } @Test @@ -121,4 +123,16 @@ public void testThrowIfDirectoryNotFound() throws IOException { assertTrue(error instanceof ActionException); assertEquals("Provided directory doesn't exist: /unknownDir/", error.getMessage()); } + + private List getChecksumFiles() throws UnsupportedEncodingException { + String[] logLines = Optional.ofNullable(action.getActionStatus().getLog()) + .map(log -> log.split("\n")) + .orElse(new String[0]); + + return Arrays.stream(logLines) + .map(line -> line.split("\t")) + .filter(tokens -> tokens.length != 0) + .map(tokens -> tokens[0]) + .collect(Collectors.toList()); + } } diff --git a/supports/tools/docker/README.md b/supports/tools/docker/README.md index 6d735b44d2..c9b302bb0c 100644 --- a/supports/tools/docker/README.md +++ b/supports/tools/docker/README.md @@ -1,6 +1,7 @@ # Run Hadoop cluster with SSM in docker containers There are two cluster types: + * singlehost * multihost @@ -19,7 +20,7 @@ Command to build docker images in singlehost cluster mode (from project root dir ./build-images.sh --cluster=singlehost --hadoop=3.3 ``` -Command to start docker containers +Command to start docker containers ```shell cd ./supports/tools/docker @@ -32,6 +33,7 @@ cd ./supports/tools/docker * Hadoop namenode, node manager, resource manager in container * SSM Server container * SSM metastore as postgres container +* Kerberos KDC container Command to build docker images in multihost cluster mode (from project root dir) @@ -46,9 +48,44 @@ cd ./supports/tools/docker ./start-demo.sh --cluster=multihost --hadoop=3.3 ``` +Use one of the following credentials to log in to the Web UI + +| Login | Password | Type | +|----------------|-----------|----------| +| john | 1234 | static | +| krb_user1@DEMO | krb_pass1 | kerberos | +| krb_user2@DEMO | krb_pass2 | kerberos | + +### Testing SPNEGO auth + +In order to test SPNEGO authentication provider, you need to: + +1. Move the `supports/tools/docker/multihost/kerberos/krb5.conf` Kerberos configuration file to the `/etc` directory + (after backing up your old config file) +2. Log in to the KDC server with one of the Kerberos principals + +```shell +kinit krb_user1 +``` + +3. Add the following lines to the `/etc/hosts` file + +``` +127.0.0.1 ssm-server.demo +127.0.0.1 kdc-server.demo +``` + +4. Try to access any SSM resource. Following query should respond with code 200 and json body: + +```shell +curl --negotiate http://ssm-server.demo:8081/api/v2/audit/events +``` + # Run/Test SSM with Docker -Docker can greately reduce boring time for installing and maintaining software on servers and developer machines. This document presents this basic workflow of Run/test ssm with docker. [Docker Quick Start](https://docs.docker.com/get-started/) +Docker can greately reduce boring time for installing and maintaining software on servers and developer machines. This +document presents this basic workflow of Run/test ssm with +docker. [Docker Quick Start](https://docs.docker.com/get-started/) ## Necessary Components @@ -56,7 +93,8 @@ Docker can greately reduce boring time for installing and maintaining software o #### Launch a postgresql container -Pull latest postgresql official image from docker store. You can use `postgres:tag` to specify the Postgresql version (`tag`) you want. +Pull latest postgresql official image from docker store. You can use `postgres:tag` to specify the Postgresql version ( +`tag`) you want. ``` docker pull postgres @@ -67,19 +105,22 @@ Launch a postgres container with a given {passowrd} on 5432, and create a test d ```bash docker run -p 5432:5432 --name {container_name} -e POSTGRES_PASSWORD={password} -e POSTGRES_DB={database_name} -d postgres:latest ``` + **Parameters:** - `container_name` name of container - `password` root password of user root for login and access. -- `database_name` Create a new database/schema with given name. +- `database_name` Create a new database/schema with given name. ### HDFS on Docker **Note that this part is not suggested on OSX (mac), becasue the containers' newtork is limited on OSX.** -Pull a well-known third-party hadoop image from docker store. You can use `hadoop-docker:tag` to specify the Hadoop version (`tag`) you want. +Pull a well-known third-party hadoop image from docker store. You can use `hadoop-docker:tag` to specify the Hadoop +version (`tag`) you want. #### Set a HDFS Container + ```bash docker pull sequenceiq/hadoop-docker ``` @@ -89,7 +130,9 @@ Launch a Hadoop container with a exposed namenode.rpcserver. ```bash docker run -it --add-host=moby:127.0.0.1 --ulimit memlock=2024000000:2024000000 -p 9000:9000 --name=hadoop sequenceiq/hadoop-docker /etc/bootstrap.sh -bash ``` -Note that we try to launch a interactive docker container. Use the following command to check HDFS status. We also set `memlock=2024000000` for cache size. + +Note that we try to launch a interactive docker container. Use the following command to check HDFS status. We also set +`memlock=2024000000` for cache size. ``` cd $HADOOP_PREFIX @@ -97,7 +140,9 @@ bin/hdfs dfs -ls / ``` #### Configure HDFS with multiple storage types and cache -Edit `$HADOOP_PREFIX/etc/hadoop/hdfs-site.xml` and add the property below. This will turn off premission check to avoid `Access denied for user ***. Superuser privilege is required`. + +Edit `$HADOOP_PREFIX/etc/hadoop/hdfs-site.xml` and add the property below. This will turn off premission check to avoid +`Access denied for user ***. Superuser privilege is required`. ``` @@ -106,7 +151,8 @@ Edit `$HADOOP_PREFIX/etc/hadoop/hdfs-site.xml` and add the property below. This ``` -Create `/tmp/hadoop-root/dfs/data1~3` for different storage types. Delete all content in `/tmp/hadoop-root/dfs/data` and `/tmp/hadoop-root/dfs/name`, then use `bin/hdfs namenode -format` to format HDFS. +Create `/tmp/hadoop-root/dfs/data1~3` for different storage types. Delete all content in `/tmp/hadoop-root/dfs/data` and +`/tmp/hadoop-root/dfs/name`, then use `bin/hdfs namenode -format` to format HDFS. Add the following properties to `$HADOOP_PREFIX/etc/hadoop/hdfs-site.xml`. @@ -129,8 +175,8 @@ Add the following properties to `$HADOOP_PREFIX/etc/hadoop/hdfs-site.xml`. ``` - Restart HDFS. + ``` $HADOOP_PREFIX/sbin/stop-dfs.sh $HADOOP_PREFIX/sbin/start-dfs.sh @@ -151,14 +197,18 @@ Assuming you are in SSM root directory, modify `conf/druid.xml` to enable SSM to root {root_password} ``` -Wait for at least 10 seconds. Then, use `bin/start-smart.sh -format` to format (re-init) the database. Also, you can use this command to clear all data in database in tests. -#### Stop/Remove Postgres container +Wait for at least 10 seconds. Then, use `bin/start-smart.sh -format` to format (re-init) the database. Also, you can use +this command to clear all data in database in tests. -You can use the `docker stop {contrainer_name}` to stop postgres container. Then, this postgres service cannot be accessed, until you start it again with `docker start {contrainer_name}`. Note that, `stop/start` will not remove any data from your postgres container. +#### Stop/Remove Postgres container -Use `docker rm {container_name}` to remove postgres container, if this container is not necessary. If you don't remember the specific name of container, you can use `docker ps -a` to look for it. +You can use the `docker stop {contrainer_name}` to stop postgres container. Then, this postgres service cannot be +accessed, until you start it again with `docker start {contrainer_name}`. Note that, `stop/start` will not remove any +data from your postgres container. +Use `docker rm {container_name}` to remove postgres container, if this container is not necessary. If you don't remember +the specific name of container, you can use `docker ps -a` to look for it. ### HDFS @@ -167,6 +217,7 @@ Use `docker rm {container_name}` to remove postgres container, if this container Configure `namenode.rpcserver` in `smart-site.xml`. ```xml + smart.dfs.namenode.rpcserver diff --git a/supports/tools/docker/multihost/Dockerfile-hadoop-base b/supports/tools/docker/multihost/Dockerfile-hadoop-base index 728ee626bb..1f89284b29 100644 --- a/supports/tools/docker/multihost/Dockerfile-hadoop-base +++ b/supports/tools/docker/multihost/Dockerfile-hadoop-base @@ -13,7 +13,7 @@ ENV SSM_HOME=/opt/ssm ENV HADOOP_URL https://archive.apache.org/dist/hadoop/core/hadoop-$HADOOP_VERSION/hadoop-$HADOOP_VERSION.tar.gz RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - net-tools curl wget netcat procps gnupg libsnappy-dev && rm -rf /var/lib/apt/lists/* + net-tools curl wget netcat procps gnupg libsnappy-dev krb5-user && rm -rf /var/lib/apt/lists/* # Install SSH server RUN apt-get update \ diff --git a/supports/tools/docker/multihost/conf/agents b/supports/tools/docker/multihost/conf/agents index ec4e98459a..972d884c20 100644 --- a/supports/tools/docker/multihost/conf/agents +++ b/supports/tools/docker/multihost/conf/agents @@ -1 +1 @@ -hadoop-datanode \ No newline at end of file +hadoop-datanode.demo \ No newline at end of file diff --git a/supports/tools/docker/multihost/conf/core-site.xml b/supports/tools/docker/multihost/conf/core-site.xml index e5cfd48c8f..0731a0321e 100644 --- a/supports/tools/docker/multihost/conf/core-site.xml +++ b/supports/tools/docker/multihost/conf/core-site.xml @@ -2,11 +2,24 @@ fs.defaultFS - hdfs://hadoop-namenode:8020 + hdfs://hadoop-namenode.demo:8020 fs.hdfs.impl org.smartdata.hadoop.filesystem.SmartFileSystem The FileSystem for hdfs URL + + hadoop.security.authorization + true + + + hadoop.security.authentication + kerberos + + + + smart.server.kerberos.principal + ssm/ssm-server.demo@DEMO + diff --git a/supports/tools/docker/multihost/conf/druid.xml b/supports/tools/docker/multihost/conf/druid.xml index 8f4c600d29..eca7096f5c 100644 --- a/supports/tools/docker/multihost/conf/druid.xml +++ b/supports/tools/docker/multihost/conf/druid.xml @@ -1,7 +1,7 @@ - jdbc:postgresql://ssm-metastore-db:5432/metastore + jdbc:postgresql://ssm-metastore-db.demo:5432/metastore ssm ssm diff --git a/supports/tools/docker/multihost/conf/hdfs-site.xml b/supports/tools/docker/multihost/conf/hdfs-site.xml index db26a98a5a..e105a90d32 100644 --- a/supports/tools/docker/multihost/conf/hdfs-site.xml +++ b/supports/tools/docker/multihost/conf/hdfs-site.xml @@ -20,11 +20,6 @@ - - - hadoop.security.authentication - simple - dfs.namenode.fs-limits.min-block-size @@ -32,18 +27,61 @@ smart.server.rpc.address - ssm-server:7042 + ssm-server.demo:7042 dfs.datanode.data.dir [RAM_DISK]file://hadoop/dfs/ram-data,[SSD]file://hadoop/dfs/ssd-data,[DISK]file://hadoop/dfs/data,[ARCHIVE]file://hadoop/dfs/archive-data - dfs.permissions.enabled - false + hadoop.user.group.static.mapping.overrides + ssm=supergroup;agent=supergroup dfs.blocksize 1048576 + + dfs.namenode.keytab.file + /etc/secrets/namenode.keytab + + + dfs.namenode.kerberos.principal + namenode/_HOST@DEMO + + + dfs.namenode.delegation.token.max-lifetime + 604800000 + The maximum lifetime in milliseconds for which a delegation token is valid. + + + dfs.datanode.keytab.file + /etc/secrets/datanode.keytab + + + dfs.datanode.kerberos.principal + datanode/_HOST@DEMO + + + dfs.block.access.token.enable + true + + + + + dfs.datanode.address + 0.0.0.0:1004 + + + dfs.datanode.http.address + 0.0.0.0:1006 + + + dfs.datanode.https.address + 0.0.0.0:1007 + + + dfs.datanode.ipc.address + 0.0.0.0:1005 + diff --git a/supports/tools/docker/multihost/conf/servers b/supports/tools/docker/multihost/conf/servers index 12e46f04f1..8ffee57843 100644 --- a/supports/tools/docker/multihost/conf/servers +++ b/supports/tools/docker/multihost/conf/servers @@ -1 +1 @@ -ssm-server \ No newline at end of file +ssm-server.demo \ No newline at end of file diff --git a/supports/tools/docker/multihost/conf/smart-default.xml b/supports/tools/docker/multihost/conf/smart-default.xml index 2cca44718a..ac73f944c1 100644 --- a/supports/tools/docker/multihost/conf/smart-default.xml +++ b/supports/tools/docker/multihost/conf/smart-default.xml @@ -23,30 +23,125 @@ + + smart.server.rpc.address + 0.0.0.0:7042 + rpc server of SSM + + + + + smart.ignore.dirs + + + SSM will completely ignore files under the given HDFS directory. + For more than one directory, they should be separated by ",". + + + + + smart.cover.dirs + + + SSM will only fetch files under the given HDFS directory. + For more than one directory, they should be separated by ",". + By default, all HDFS files are covered. + + + + + smart.work.dir + /system/ssm + + This HDFS directory is used as a work directory for SSM to store tmp files. + The default path is "/system/ssm", and SSM will ignore HDFS inotify for + files under the work directory. Only one directory can be set for this property. + + + + + smart.client.concurrent.report.enabled + true + + This property is used to enable/disable concurrent report for SmartClient. + If it is enabled, SmartClient will connect to multiple configured smart + servers concurrently, which is an optimization to find active smart server. + Only active smart sever will respond to build successful connection. If + report is successfully delivered to active smart server, attempting to + connect to other smart servers will be canceled. + + + + + + smart.dfs.enabled true - For SSM, all functions disabled except rpc and RESTApi service if false. + By setting false, all SSM functions except rpc and RESTApi service will be disabled. - smart.server.rpc.address - ssm-server:7042 - rpc server of SSM + hadoop.security.credential.provider.path + + + This property specifies jceks path which can store password of metastore by + using alias 'smart.metastore.password'. An example is jceks://file/root/ssm.jceks. + No path is provided by default, then the password configured in druid.xml will be + used. + + + + + smart.server.rpc.handler.count + 80 + number of handlers in rpc server - smart.agent.address - hadoop-datanode:7048 - SmartAgent address + smart.agent.port + 7048 + SmartAgent port - smart.agent.master.address - hadoop-datanode:7051 - SmartAgent master address + smart.agent.master.port + 7051 + SmartAgent master port + + + + smart.namespace.fetcher.batch + 500 + Batch size of Namespace fetcher + + + + smart.namespace.fetcher.producers.num + 3 + Number of producers in namespace fetcher + + + + smart.namespace.fetcher.consumers.num + 6 + Number of consumers in namespace fetcher @@ -54,4 +149,399 @@ 5 Max number of rules that can be executed in parallel + + + smart.cmdlet.executors + 10 + Max number of cmdlets that can be executed in parallel + + + + smart.dispatch.cmdlets.extra.num + 10 + The number of extra cmdlets dispatched by Smart Server + + + + smart.cmdlet.dispatchers + 3 + Max number of cmdlet dispatchers that work in parallel + + + + smart.cmdlet.mover.max.concurrent.blocks.per.srv.inst + 0 + + Max number of file mover cmdlets that can be executed in parallel per SSM service. + 0 means unlimited. + + + + + smart.action.move.throttle.mb + 0 + The throughput limit (MB) for SSM move overall + + + + smart.action.copy.throttle.mb + 0 + The throughput limit (MB) for SSM copy overall + + + + smart.action.ec.throttle.mb + 0 + The throughput limit (MB) for SSM EC overall + + + + smart.action.local.execution.disabled + false + + The default false value means active smart server can also execute actions, + like an agent. If it is set to true, active SSM server will NOT be able to + execute actions. This configuration has no impact on standby smart server. + + + + + smart.cmdlet.max.num.pending + 20000 + + Maximum number of pending cmdlets in SSM server. + Default value is 20000. + + + + + smart.cmdlet.hist.max.num.records + 100000 + + Maximum number of historic cmdlet records kept in SSM server. + Oldest cmdlets will be deleted if exceeds the threshold. + + + + + smart.cmdlet.hist.max.record.lifetime + 30day + + Maximum life time of historic cmdlet records kept in SSM server. + Cmdlet record will be deleted from SSM server if exceeds the threshold. + Valid time unit can be 'day', 'hour', 'min', 'sec'. The minimum update + granularity is 5sec. + + + + + smart.cmdlet.cache.batch + 600 + + Maximum batch size of cmdlet batch insert. + + + + + smart.copy.scheduler.base.sync.batch + 500 + + Maximum batch size of copyscheduler base sync batch insert. + + + + + smart.file.diff.max.num.records + 10000 + + Maximum file diff records with useless state. + + + + + smart.status.report.period + 10 + + The status report period for actions. The time unit is millisecond. + + + + + smart.status.report.period.multiplier + 50 + + The report period multiplied by this multiplier defines largest report interval. + + + + + smart.status.report.ratio + 0.2 + + If the finished actions ratio equals or exceeds this value, status report will + be triggered. + + + + + smart.namespace.fetcher.ignore.unsuccessive.inotify.event + false + + Skip fetch the entire namespace and only use available iNotify events to + update namespace if true. NOTE: This may leads to some unpredictable + consequences and should only be used for test. + + + + + smart.cmdlet.dispatcher.log.disp.result + false + + Log dispatch result for each cmdlet been dispatched. + + + + + smart.cmdlet.dispatcher.log.disp.metrics.interval + 5000 + + Time interval in millisecond to log statistic metrics of cmdlet dispatcher. + If no cmdlet dispatched in the time interval, then no output for this interval. + Disable the logger if equals 0. + + + + + smart.compression.codec + Zlib + + The default compression codec for SSM compression (Zlib, Lz4, Bzip2, snappy). + User can also specify a codec in action arg, then this default setting will + be overridden. + + + + + smart.compression.max.split + 1000 + + The max number of chunks split for compression. + + + + + smart.compact.batch.size + 200 + + The max number of small files to be compacted by per compact action. + + + + + smart.compact.container.file.threshold.mb + 1024 + + The max size of a container file in MB. + + + + + smart.metastore.migration.liquibase.changelog.path + db/changelog/changelog-root.xml + + Path to liquibase changelog root file. + + + + + smart.ignore.path.templates + + + Comma-separated list of regex templates of HDFS paths to be completely ignored by SSM. + + + + + smart.internal.path.templates + .*/\..*,.*/__.*,.*_COPYING_.* + + Comma-separated list of regex templates of internal files to be completely ignored by SSM. + + + + + smart.file.access.event.fetch.interval.ms + 1000 + + The interval in milliseconds between access event fetches. + + + + + smart.cached.file.fetch.interval.ms + 5000 + + The interval in milliseconds between cached files fetches from HDFS. + + + + + smart.namespace.fetch.interval.ms + 1 + + The interval in milliseconds between namespace fetches from HDFS. + + + + + smart.mover.scheduler.storage.report.fetch.interval.ms + 120000 + + The interval in milliseconds between storage report fetches from HDFS DataNode in mover scheduler. + + + + + smart.metastore.small-file.insert.batch.size + 200 + + The max size of small file insert batch to the Metastore. + + + + + smart.agent.master.ask.timeout.ms + 5000 + + The max time in milliseconds to wait an answer from the SmartAgent master actor during action submission. + + + + + smart.file.access.count.aggregation.interval.ms + 5000 + + The interval in milliseconds that is covered by single second-granularity access count table. + + + + + smart.sync.schedule.strategy + UNORDERED + + Strategy of copying files during 'sync' rule. Possible values: + FIFO - the files created/modified first will be scheduled for transfer first + LIFO - the files created/modified last will be scheduled for transfer first + UNORDERED - no guarantees of the file scheduling order + + + + + smart.sync.file.equality.strategy + CHECKSUM + + The strategy for checking whether the files with same relative path in the source and target clusters + are equal during scheduling of the sync action. + Possible values: + FILE_LENGTH - equality check based on the file length. This strategy is fast alternative to + comparing file contents/checksums, but have some corner cases when two different files with the same + length but with different content are considered equal. + CHECKSUM - equality check based on the file checksum. This strategy is more resource-intensive, + but it doesn't return false positive results, like previous one. + + + + + smart.rest.server.port + 8081 + SSM Rest Server port + + + + smart.client.report.tasks.timeout.ms + 2000 + + Timeout in milliseconds for the successful file access report. + Has an effect only if the 'smart.client.concurrent.report.enabled' option is set to true. + + + + + smart.client.active.server.cache.path + /tmp/active_smart_server + + Local filesystem path of the active Smart Server address file-based cache. + + + + + smart.rest.server.security.enabled + false + + Whether to enable SSM REST server security. + + + + + smart.rest.server.auth.spnego.enabled + false + + Whether to enable SSM REST server SPNEGO authentication method support. + + + + + smart.rest.server.auth.kerberos.enabled + false + + Whether to enable SSM REST server basic Kerberos authentication method support. + + + + + smart.rest.server.auth.predefined.enabled + false + + Whether to enable SSM REST server basic authentication with users, + predefined in the 'smart.rest.server.auth.predefined.users' option. + + + + + smart.file.access.count.aggregator.failover.retry.count + 60 + + Maximum number of attempts to save file access events + + + + + smart.file.access.count.aggregator.failover + SAVE_FAILED_WITH_RETRY + + Failover strategy for file access events aggregator. Possible values: + FAIL - throw exception, no failover. + SAVE_FAILED_WITH_RETRY - save all file access events that caused exception + for later submission with max attempts less or equals than smart.access.count.aggregator.failover.retry.count + + + + + smart.action.client.cache.ttl + 10m + + The minimum amount of time after the last access to the DFS client cache entry + that must pass in order for the entry to be evicted. + Should be in the format '[Amount][TimeUnit]', where TimeUnit is one + of 'day' or 'd', 'hour' or 'h', 'min' or 'm', 'sec' or 's'. + + + + + smart.rest.server.ssl.enabled + false + + Whether to enable SSL support for the SSM REST server. + + diff --git a/supports/tools/docker/multihost/conf/smart-site.xml b/supports/tools/docker/multihost/conf/smart-site.xml index 569d5aa508..1cad7ef24e 100644 --- a/supports/tools/docker/multihost/conf/smart-site.xml +++ b/supports/tools/docker/multihost/conf/smart-site.xml @@ -18,13 +18,77 @@ - smart.dfs.namenode.rpcserver - hdfs://hadoop-namenode:8020 - Namenode rpcserver + smart.dfs.enabled + true - + /opt/ssm/conf + + + smart.security.enable + true + + + smart.server.keytab.file + /etc/secrets/ssm.keytab + + + smart.server.kerberos.principal + ssm/ssm-server.demo@DEMO + + + smart.agent.keytab.file + /etc/secrets/agent.keytab + + + smart.agent.kerberos.principal + agent/_HOST@DEMO + + + + + smart.rest.server.security.enabled + true + + + smart.rest.server.auth.predefined.enabled + true + + + smart.rest.server.auth.predefined.users + john:1234 + + + smart.rest.server.auth.kerberos.enabled + true + + + smart.rest.server.auth.spnego.enabled + true + + + smart.rest.server.auth.spnego.keytab + /etc/secrets/http.keytab + + + smart.rest.server.auth.spnego.principal + HTTP/ssm-server.demo@DEMO + \ No newline at end of file diff --git a/supports/tools/docker/multihost/conf/yarn-site.xml b/supports/tools/docker/multihost/conf/yarn-site.xml new file mode 100644 index 0000000000..afd7ee866f --- /dev/null +++ b/supports/tools/docker/multihost/conf/yarn-site.xml @@ -0,0 +1,39 @@ + + + + + + + + + yarn.resourcemanager.principal + yarn/hadoop-namenode.demo@DEMO + + + yarn.nodemanager.principal + yarn/hadoop-namenode.demo@DEMO + + + yarn.resourcemanager.keytab + /etc/secrets/yarn.keytab + + + yarn.nodemanager.keytab + /etc/secrets/yarn.keytab + + diff --git a/supports/tools/docker/multihost/datanode/Dockerfile-hadoop-datanode b/supports/tools/docker/multihost/datanode/Dockerfile-hadoop-datanode index 0b537a758c..deefa84777 100644 --- a/supports/tools/docker/multihost/datanode/Dockerfile-hadoop-datanode +++ b/supports/tools/docker/multihost/datanode/Dockerfile-hadoop-datanode @@ -4,6 +4,9 @@ FROM cloud-hub.adsw.io/library/hadoop-base:${HADOOP_VERSION:-3.3.6} ENV HDFS_PARENT_DATA_DIR=/hadoop/dfs ENV HDFS_CONF_dfs_datanode_data_dir=file://$HDFS_PARENT_DATA_DIR/data +RUN apt-get update \ + && apt-get install -y jsvc + RUN mkdir -p $HDFS_PARENT_DATA_DIR/data RUN mkdir -p $HDFS_PARENT_DATA_DIR/ssd-data RUN mkdir -p $HDFS_PARENT_DATA_DIR/ram-data diff --git a/supports/tools/docker/multihost/datanode/hadoop-datanode-entrypoint.sh b/supports/tools/docker/multihost/datanode/hadoop-datanode-entrypoint.sh index af3dbfdf92..8d1baf5873 100644 --- a/supports/tools/docker/multihost/datanode/hadoop-datanode-entrypoint.sh +++ b/supports/tools/docker/multihost/datanode/hadoop-datanode-entrypoint.sh @@ -4,7 +4,7 @@ cp /etc/ssm/shared/id_rsa /root/.ssh/id_rsa cp /etc/ssm/shared/id_rsa.pub /root/.ssh/id_rsa.pub cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys service ssh start -ssh-keyscan ssm-server >> /root/.ssh/known_hosts +ssh-keyscan ssm-server.demo >> /root/.ssh/known_hosts echo "export JAVA_HOME=${JAVA_HOME}" >> /root/.bashrc datadir=`echo $HDFS_CONF_dfs_datanode_data_dir | perl -pe 's#file://##'` @@ -13,6 +13,8 @@ if [ ! -d $datadir ]; then exit 2 fi +chmod +r /etc/secrets/*.keytab + $HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR datanode tail -f /dev/null \ No newline at end of file diff --git a/supports/tools/docker/multihost/docker-compose.yaml b/supports/tools/docker/multihost/docker-compose.yaml index 7acb826fa4..b4b3a31919 100644 --- a/supports/tools/docker/multihost/docker-compose.yaml +++ b/supports/tools/docker/multihost/docker-compose.yaml @@ -3,11 +3,13 @@ services: hadoop-namenode: image: cloud-hub.adsw.io/library/hadoop-namenode:${HADOOP_VERSION:-3.3.6} - hostname: hadoop-namenode + hostname: hadoop-namenode.demo container_name: hadoop-namenode restart: unless-stopped volumes: - - hadoop-namenode:/hadoop/dfs/name + - hadoop-namenode:/hadoop/dfs/name + - secrets:/etc/secrets + - ./kerberos/krb5.conf:/etc/krb5.conf ports: - "9870:9870" - "8020:8020" @@ -18,12 +20,15 @@ services: interval: 30s timeout: 15s retries: 5 + networks: + - demo depends_on: - ssm-metastore-db + - kdc-server hadoop-datanode: image: cloud-hub.adsw.io/library/hadoop-datanode:${HADOOP_VERSION:-3.3.6} - hostname: hadoop-datanode + hostname: hadoop-datanode.demo container_name: hadoop-datanode restart: unless-stopped volumes: @@ -32,6 +37,8 @@ services: - hadoop-datanode-ram-data:/hadoop/dfs/ram-data - hadoop-datanode-ssd-data:/hadoop/dfs/ssd-data - hadoop-datanode-archive-data:/hadoop/dfs/archive-data + - secrets:/etc/secrets + - ./kerberos/krb5.conf:/etc/krb5.conf ports: - "9864:9864" - "7048:7048" @@ -39,35 +46,42 @@ services: env_file: - hadoop.env healthcheck: - test: curl http://hadoop-datanode:9864 || exit 1 + test: curl http://hadoop-datanode.demo:1006 || exit 1 interval: 30s timeout: 15s retries: 5 + networks: + - demo depends_on: - ssm-server ssm-server: image: cloud-hub.adsw.io/library/ssm-server:2.0.0-SNAPSHOT - hostname: ssm-server + hostname: ssm-server.demo restart: unless-stopped container_name: ssm-server volumes: - ssm-shared:/tmp/shared + - secrets:/etc/secrets + - ./kerberos/krb5.conf:/etc/krb5.conf ports: - "7042:7042" - "8081:8081" healthcheck: - test: curl http://ssm-server:8081 || exit 1 + test: curl http://ssm-server.demo:8081 || exit 1 interval: 30s timeout: 15s retries: 5 + networks: + - demo depends_on: - ssm-metastore-db + - kdc-server ssm-metastore-db: image: "docker.io/library/postgres:14.0" restart: unless-stopped - container_name: ssm-metastore-db + container_name: ssm-metastore-db.demo hostname: ssm-metastore-db environment: POSTGRES_DB: 'metastore' @@ -80,10 +94,25 @@ services: interval: 30s timeout: 15s retries: 3 + networks: + - demo + + kdc-server: + image: cloud-hub.adsw.io/library/ssm-kdc-server:${HADOOP_VERSION:-3.3.6} + restart: unless-stopped + container_name: kdc-server + hostname: kdc-server.demo + volumes: + - secrets:/tmp/secrets + ports: + - "749:749" + - "88:88/udp" + networks: + - demo networks: - default: - name: ssm-automation + demo: + name: demo volumes: ssm-shared: @@ -92,3 +121,4 @@ volumes: hadoop-datanode-ram-data: hadoop-datanode-ssd-data: hadoop-datanode-archive-data: + secrets: diff --git a/supports/tools/docker/multihost/hadoop.env b/supports/tools/docker/multihost/hadoop.env index a33aff9c4b..b555c55bb6 100644 --- a/supports/tools/docker/multihost/hadoop.env +++ b/supports/tools/docker/multihost/hadoop.env @@ -2,11 +2,12 @@ CLUSTER_NAME=ssm-hadoop ENSURE_NAMENODE_DIR="/hadoop/dfs/name" -CORE_CONF_fs_defaultFS=hdfs://hadoop-namenode:8020 CORE_CONF_hadoop_http_staticuser_user=root +HDFS_DATANODE_SECURE_USER="root" +JSVC_HOME=/usr/bin + HDFS_CONF_dfs_webhdfs_enabled=true -HDFS_CONF_dfs_permissions_enabled=false YARN_CONF_yarn_log___aggregation___enable=true YARN_CONF_yarn_resourcemanager_recovery_enabled=true diff --git a/supports/tools/docker/multihost/kerberos/Dockerfile-kdc b/supports/tools/docker/multihost/kerberos/Dockerfile-kdc new file mode 100644 index 0000000000..bdcc5922cd --- /dev/null +++ b/supports/tools/docker/multihost/kerberos/Dockerfile-kdc @@ -0,0 +1,18 @@ +FROM quay.io/centos/centos:stream9 + +EXPOSE 749 88/udp + +RUN yum install -y krb5-server + +ENV REALM ${REALM:-DEMO} +ENV MASTER_PASSWORD ${MASTER_PASSWORD:-masterpassword} +ENV KADMIN_PRINCIPAL ${KADMIN_PRINCIPAL:-kadmin/admin} +ENV KADMIN_PASSWORD ${KADMIN_PASSWORD:-adminpassword} + +COPY ./supports/tools/docker/multihost/kerberos/krb5.conf /etc/krb5.conf +COPY ./supports/tools/docker/multihost/kerberos/kdc.conf /etc/krb5kdc/kdc.conf +COPY ./supports/tools/docker/multihost/kerberos/kdc.conf /var/kerberos/krb5kdc/kdc.conf +COPY ./supports/tools/docker/multihost/kerberos/kadm5.acl /etc/krb5kdc/kadm5.acl +COPY ./supports/tools/docker/multihost/kerberos/kdc-init.sh /tmp/ + +CMD bash /tmp/kdc-init.sh diff --git a/supports/tools/docker/multihost/kerberos/kadm5.acl b/supports/tools/docker/multihost/kerberos/kadm5.acl new file mode 100644 index 0000000000..aa151aa89f --- /dev/null +++ b/supports/tools/docker/multihost/kerberos/kadm5.acl @@ -0,0 +1 @@ +kadmin/admin@DEMO * \ No newline at end of file diff --git a/supports/tools/docker/multihost/kerberos/kdc-init.sh b/supports/tools/docker/multihost/kerberos/kdc-init.sh new file mode 100644 index 0000000000..19a01a34c2 --- /dev/null +++ b/supports/tools/docker/multihost/kerberos/kdc-init.sh @@ -0,0 +1,52 @@ +#!/bin/bash + + +echo "==== Add appuser =========" +adduser appuser +echo "" + +echo "==== Kerberos KDC and Kadmin ========" +KADMIN_PRINCIPAL_FULL=$KADMIN_PRINCIPAL@$REALM +echo "REALM: $REALM" +echo "KADMIN_PRINCIPAL_FULL: $KADMIN_PRINCIPAL_FULL" +echo "KADMIN_PASSWORD: $KADMIN_PASSWORD" +echo "" + +echo "==== Creating realm ========" +/usr/sbin/kdb5_util create -s -r $REALM -P $MASTER_PASSWORD +echo "" + +echo "==== Creating default principals in the acl ===========" +echo "Adding $KADMIN_PRINCIPAL principal" +kadmin.local -q "delete_principal -force $KADMIN_PRINCIPAL_FULL" +kadmin.local -q "addprinc -pw $KADMIN_PASSWORD $KADMIN_PRINCIPAL_FULL" +echo "" + +echo "==== Creating SSM, HDFS and YARN principals ==========" +kadmin.local -q "add_principal -randkey namenode/hadoop-namenode.demo@DEMO" +kadmin.local -q "add_principal -randkey datanode/hadoop-datanode.demo@DEMO" +kadmin.local -q "add_principal -randkey HTTP/ssm-server.demo@DEMO" +kadmin.local -q "add_principal -randkey ssm/ssm-server.demo@DEMO" +kadmin.local -q "add_principal -randkey agent/hadoop-datanode.demo@DEMO" +kadmin.local -q "add_principal -randkey agent/ssm-server.demo@DEMO" +kadmin.local -q "add_principal -randkey yarn/hadoop-namenode.demo@DEMO" +kadmin.local -q "add_principal -pw krb_pass1 krb_user1@DEMO" +kadmin.local -q "add_principal -pw krb_pass2 krb_user2@DEMO" +echo "" + +echo "==== Remove old keytabs ==========" +rm -rf /tmp/secrets/*.keytab +echo "" + +echo "==== Export keytabs for SSM, HDFS and YARN ==========" +kadmin.local -q "xst -kt /tmp/secrets/namenode.keytab namenode/hadoop-namenode.demo@DEMO" && chown appuser:appuser /tmp/secrets/namenode.keytab +kadmin.local -q "xst -kt /tmp/secrets/datanode.keytab datanode/hadoop-datanode.demo@DEMO" && chown appuser:appuser /tmp/secrets/datanode.keytab +kadmin.local -q "xst -kt /tmp/secrets/http.keytab HTTP/ssm-server.demo@DEMO" && chown appuser:appuser /tmp/secrets/http.keytab +kadmin.local -q "xst -kt /tmp/secrets/ssm.keytab ssm/ssm-server.demo@DEMO" && chown appuser:appuser /tmp/secrets/ssm.keytab +kadmin.local -q "xst -kt /tmp/secrets/agent.keytab agent/hadoop-datanode.demo@DEMO" && chown appuser:appuser /tmp/secrets/agent.keytab +kadmin.local -q "xst -kt /tmp/secrets/agent.keytab agent/ssm-server.demo@DEMO" && chown appuser:appuser /tmp/secrets/agent.keytab +kadmin.local -q "xst -kt /tmp/secrets/yarn.keytab yarn/hadoop-namenode.demo@DEMO" && chown appuser:appuser /tmp/secrets/yarn.keytab +echo "" + +krb5kdc +kadmind -nofork \ No newline at end of file diff --git a/supports/tools/docker/multihost/kerberos/kdc.conf b/supports/tools/docker/multihost/kerberos/kdc.conf new file mode 100644 index 0000000000..c5059744bd --- /dev/null +++ b/supports/tools/docker/multihost/kerberos/kdc.conf @@ -0,0 +1,7 @@ +[realms] + DEMO = { + acl_file = /etc/krb5kdc/kadm5.acl + max_renewable_life = 7d 0h 0m 0s + supported_enctypes = aes256-cts-hmac-sha1-96:normal + default_principal_flags = +preauth + } \ No newline at end of file diff --git a/supports/tools/docker/multihost/kerberos/krb5.conf b/supports/tools/docker/multihost/kerberos/krb5.conf new file mode 100644 index 0000000000..be7ce36875 --- /dev/null +++ b/supports/tools/docker/multihost/kerberos/krb5.conf @@ -0,0 +1,19 @@ +[logging] + default = FILE:/var/log/krb5libs.log + kdc = FILE:/var/log/krb5kdc.log + admin_server = FILE:/var/log/kadmind.log +[libdefaults] + default_realm = DEMO + dns_lookup_realm = false + ticket_lifetime = 24h + forwardable = true + rdns = false + pkinit_anchors = /etc/pki/tls/certs/ca-bundle.crt +[realms] + DEMO = { + kdc = kdc-server.demo + admin_server = kdc-server.demo + } +[domain_realm] + .demo = DEMO + demo = DEMO \ No newline at end of file diff --git a/supports/tools/docker/multihost/namenode/hadoop-namenode-entrypoint.sh b/supports/tools/docker/multihost/namenode/hadoop-namenode-entrypoint.sh index f6bae1bb36..7ac0954736 100644 --- a/supports/tools/docker/multihost/namenode/hadoop-namenode-entrypoint.sh +++ b/supports/tools/docker/multihost/namenode/hadoop-namenode-entrypoint.sh @@ -3,7 +3,7 @@ . ./common.sh service ssh start -ssh-keyscan ssm-server >> /root/.ssh/known_hosts +ssh-keyscan ssm-server.demo >> /root/.ssh/known_hosts echo "export JAVA_HOME=${JAVA_HOME}" >> /root/.bashrc namedir=`echo $HDFS_CONF_dfs_namenode_name_dir | perl -pe 's#file://##'` diff --git a/supports/tools/docker/multihost/namenode/healthcheck-hadoop-namenode.sh b/supports/tools/docker/multihost/namenode/healthcheck-hadoop-namenode.sh index 7332761d57..0142c657ac 100755 --- a/supports/tools/docker/multihost/namenode/healthcheck-hadoop-namenode.sh +++ b/supports/tools/docker/multihost/namenode/healthcheck-hadoop-namenode.sh @@ -1,10 +1,10 @@ #!/usr/bin/env bash # Check namenode -curl -f http://hadoop-namenode:9870/ || exit 1 +curl -f http://hadoop-namenode.demo:9870/ || exit 1 # Check resource manager -curl -f http://hadoop-namenode:8088/ || exit 1 +curl -f http://hadoop-namenode.demo:8088/ || exit 1 # Check node manager -curl -f http://hadoop-namenode:8042/ || exit 1 +curl -f http://hadoop-namenode.demo:8042/ || exit 1 diff --git a/supports/tools/docker/multihost/ssm/Dockerfile-ssm-server b/supports/tools/docker/multihost/ssm/Dockerfile-ssm-server index 006251ad0d..e8381ec28c 100644 --- a/supports/tools/docker/multihost/ssm/Dockerfile-ssm-server +++ b/supports/tools/docker/multihost/ssm/Dockerfile-ssm-server @@ -11,7 +11,7 @@ RUN apt-get update \ && apt-get install -y nano \ && mkdir -p /root/.ssh \ && chmod 0700 /root/.ssh \ - && ssh-keygen -t rsa -C "root@ssm-server" -N "" -f /root/.ssh/id_rsa \ + && ssh-keygen -t rsa -C "root@ssm-server.demo" -N "" -f /root/.ssh/id_rsa \ && cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys RUN mkdir /opt/ssm diff --git a/supports/tools/docker/multihost/ssm/ssm-server-entrypoint.sh b/supports/tools/docker/multihost/ssm/ssm-server-entrypoint.sh index 451f2887e4..8004047c25 100644 --- a/supports/tools/docker/multihost/ssm/ssm-server-entrypoint.sh +++ b/supports/tools/docker/multihost/ssm/ssm-server-entrypoint.sh @@ -23,6 +23,6 @@ echo "Starting SSM agents" echo "-------------------" source bin/start-agent.sh & -wait_for_it hadoop-datanode:7048 +wait_for_it hadoop-datanode.demo:7048 tail -f /var/log/ssm/* \ No newline at end of file