diff --git a/jac-cloud/.gitignore b/jac-cloud/.gitignore index 269f954165..3e9601fd5b 100644 --- a/jac-cloud/.gitignore +++ b/jac-cloud/.gitignore @@ -51,3 +51,7 @@ out.txt # Mypy files # .mypy_cache* .jac_mypy_cache + + +# Jac-Cloud # +logs/ \ No newline at end of file diff --git a/jac-cloud/README.md b/jac-cloud/README.md index dec1f2cb9b..b389f977db 100644 --- a/jac-cloud/README.md +++ b/jac-cloud/README.md @@ -11,4 +11,5 @@ pre-commit install ``` # **REFERENCES** ### [**jac-cloud**](./docs/Jaseci.md) -### [**Environment Variables**](./docs/Environment-Variables.md) \ No newline at end of file +### [**Environment Variables**](./docs/Environment-Variables.md) +### [**Logger**](./docs/Logger.md) \ No newline at end of file diff --git a/jac-cloud/docs/Environment-Variables.md b/jac-cloud/docs/Environment-Variables.md index e926bfd908..a55d6ee308 100644 --- a/jac-cloud/docs/Environment-Variables.md +++ b/jac-cloud/docs/Environment-Variables.md @@ -25,6 +25,13 @@ | RESET_CODE_TIMEOUT | Password reset code expiration in hours | 24 | | SENDGRID_HOST | Sendgrid host used for hyperlinking verification/reset code | http://localhost:8000 | | SENDGRID_API_KEY | Sendgrid api key | null | +| LOGGER_NAME | Specified logger name | app | +| LOGGER_LEVEL | Control log level | debug | +| LOGGER_FILE_PATH | Log directory and name | /tmp/jac_cloud_logs/jac-cloud.log | +| LOGGER_ROLLOVER_INTERVAL | M = every minute, H = hourly, D = daily, W = weekly | D | +| LOGGER_MAX_BACKUP | Maximum number of backup files before it will deletes old file. Non positive value will not have maximum | -1 | +| LOGGER_ROLLOVER_MAX_FILE_SIZE | Maximum file size in bytes before it will rollover to new file | 10000000 | +| LOGGER_USE_UTC | If logger will use UTC | false | # **SSO Supported Enviroment Variable** ## Supported Platform diff --git a/jac-cloud/docs/Logger.md b/jac-cloud/docs/Logger.md new file mode 100644 index 0000000000..10c65f798d --- /dev/null +++ b/jac-cloud/docs/Logger.md @@ -0,0 +1,57 @@ +# Logging +* jac-cloud server logs incoming requests and outgoing responses by default to log files stored in `/tmp/` directory +* The log files are on a daily rotation. Meaning there will be a new log file created every day to prevent log files gets too large. +* For production usage, we recommend connect your jac-cloud logs to an Elastic instance. +# Quick Start: Integration with Elasitc +* Assuming you have a running Elastic instance, you just need to use filebeat to ingest the log files into elastic. +* We provide a template filebeat config file to get started at `scripts/filebeat-template.yaml`. If you want to adopt the default configuration, simply change the `hosts` and `api_key` field. + * Change the hosts field to point to your elastic instance. + +> :warning: It seems that filebeat automatically append a 9200 port to the host URL if no port is specified. If your elastic instance is behind a load balancer and simply has a URL without a custom port, you will need to add either :80 or :443 to the hosts config. For example, `hosts: ["https://my_elastic_instance.me.com:443/]` + +* Then simply run `filebeat -e -c scripts/filebeat-template.yaml`. + +## More Details on Configuring and Starting Filebeat +- [Download](https://www.elastic.co/downloads/beats/filebeat) and install filebeat. +- setup yml based on your setup +```yml +filebeat.inputs: +- type: log + enabled: true + paths: + - /tmp/jac_cloud_logs/*-jac-cloud-*.log + - /tmp/jac_cloud_logs/jac-cloud.log + json: + keys_under_root: true + overwrite_keys: true + add_error_key: true + expand_keys: true + +output.elasticsearch: + hosts: ["localhost:9200"] + protocol: https + api_key: "id:api_key" + index: "filebeat-testing" + +setup.template.name: "filebeat" +setup.template.pattern: "filebeat-*" +``` +- to run without restriction, run filebeat as root + - `sudo cp filebeat.yml /etc/filebeat/filebeat.yml` + - `sudo filebeat -e` +- normal run + - `filebeat -e -c filebeat.yml` +- for complete documentation + - https://www.elastic.co/guide/en/cloud/current/ec-getting-started-search-use-cases-python-logs.html + - https://www.elastic.co/guide/en/beats/filebeat/current/configuring-howto-filebeat.html + +## Additional Env Vars to customize logger +| **NAME** | **DESCRIPTION** | **DEFAULT** | +|-----------|-------------------|---------------| +| LOGGER_NAME | Specified logger name | app | +| LOGGER_LEVEL | Control log level | debug | +| LOGGER_FILE_PATH | Log directory and name | /tmp/jac_cloud_logs/jac-cloud.log | +| LOGGER_ROLLOVER_INTERVAL | M = every minute, H = hourly, D = daily, W = weekly | D | +| LOGGER_MAX_BACKUP | Maximum number of backup files before it will deletes old file. Non positive value will not have maximum | -1 | +| LOGGER_ROLLOVER_MAX_FILE_SIZE | Maximum file size in bytes before it will rollover to new file | 10000000 | +| LOGGER_USE_UTC | If logger will use UTC | false | \ No newline at end of file diff --git a/jac-cloud/jac_cloud/core/context.py b/jac-cloud/jac_cloud/core/context.py index 092975966e..da713d3d9e 100644 --- a/jac-cloud/jac_cloud/core/context.py +++ b/jac-cloud/jac_cloud/core/context.py @@ -8,7 +8,6 @@ from bson import ObjectId from fastapi import Request -from fastapi.responses import ORJSONResponse from jaclang.runtimelib.context import ExecutionContext @@ -144,7 +143,7 @@ def get_root() -> Root: # type: ignore[override] """Get current root.""" return cast(Root, JaseciContext.get().root.architype) - def response(self, returns: list[Any]) -> ORJSONResponse: + def response(self, returns: list[Any]) -> dict[str, Any]: """Return serialized version of reports.""" resp = ContextResponse[Any](status=self.status) @@ -159,7 +158,7 @@ def response(self, returns: list[Any]) -> ORJSONResponse: if SHOW_ENDPOINT_RETURNS: resp.returns = returns - return ORJSONResponse(resp.__serialize__(), status_code=self.status) + return resp.__serialize__() def clean_response( self, key: str | int, val: Any, obj: list | dict # noqa: ANN401 diff --git a/jac-cloud/jac_cloud/jaseci/__init__.py b/jac-cloud/jac_cloud/jaseci/__init__.py index 0c7bc41812..068a60e59a 100644 --- a/jac-cloud/jac_cloud/jaseci/__init__.py +++ b/jac-cloud/jac_cloud/jaseci/__init__.py @@ -2,13 +2,15 @@ from contextlib import asynccontextmanager from os import getenv +from traceback import format_exception from typing import Any, AsyncGenerator -from fastapi import FastAPI as _FaststAPI +from fastapi import FastAPI as _FaststAPI, Request +from fastapi.responses import ORJSONResponse from uvicorn import run as _run -from .utils import Emailer +from .utils import Emailer, logger class FastAPI: @@ -63,6 +65,22 @@ async def lifespan(app: _FaststAPI) -> AsyncGenerator[None, _FaststAPI]: for router in [healthz_router, sso_router, user_router, walker_router]: cls.__app__.include_router(router) + @cls.__app__.exception_handler(Exception) + async def uncatched_exception_handler( + request: Request, exc: Exception + ) -> ORJSONResponse: + """Catched uncatched exceptions.""" + response = {"errors": format_exception(exc)} + + log: dict[str, Any] = {"request_url": str(request.url)} + log["extra_fields"] = list(log.keys()) + logger.error( + f"Call from to {log["request_url"]} returns unexpected errors: {response["errors"]}", + extra=log, + ) + + return ORJSONResponse(response, status_code=500) + return cls.__app__ @classmethod @@ -71,7 +89,7 @@ def start( host: str | None = None, port: int | None = None, emailer: type[Emailer] | None = None, - **kwargs: Any # noqa ANN401 + **kwargs: Any, # noqa ANN401 ) -> None: """Run FastAPI Handler via Uvicorn.""" if emailer: @@ -81,5 +99,5 @@ def start( cls.get(), host=host or getenv("HOST") or "0.0.0.0", port=port or int(getenv("PORT", "8000")), - **kwargs + **kwargs, ) diff --git a/jac-cloud/jac_cloud/jaseci/models/user.py b/jac-cloud/jac_cloud/jaseci/models/user.py index 1fe40a23a0..07ebb4cea8 100644 --- a/jac-cloud/jac_cloud/jaseci/models/user.py +++ b/jac-cloud/jac_cloud/jaseci/models/user.py @@ -29,6 +29,12 @@ def obfuscate(self) -> dict: data["password"] = pbkdf2_sha512.hash(self.password).encode() return data + def printable(self) -> dict: + """Return BaseModel.model_dump where the password is hashed.""" + data = self.model_dump(exclude={"password"}) + data["password"] = "****" + return data + @dataclass(kw_only=True) class User: diff --git a/jac-cloud/jac_cloud/jaseci/routers/user.py b/jac-cloud/jac_cloud/jaseci/routers/user.py index e4a1bc25f5..570e449009 100644 --- a/jac-cloud/jac_cloud/jaseci/routers/user.py +++ b/jac-cloud/jac_cloud/jaseci/routers/user.py @@ -25,7 +25,7 @@ invalidate_token, verify_code, ) -from ..utils import Emailer, logger +from ..utils import Emailer, log_entry, log_exit, logger from ...core.architype import BulkWrite, NodeAnchor, Root @@ -38,6 +38,8 @@ @router.post("/register", status_code=status.HTTP_200_OK) def register(req: User.register_type()) -> ORJSONResponse: # type: ignore """Register user API.""" + log = log_entry("register", req.email, req.printable()) + with User.Collection.get_session() as session, session.start_transaction(): root = Root().__jac__ @@ -56,7 +58,9 @@ def register(req: User.register_type()) -> ORJSONResponse: # type: ignore BulkWrite.commit(session) if not is_activated: User.send_verification_code(create_code(id), req.email) - return ORJSONResponse({"message": "Successfully Registered!"}, 201) + resp = {"message": "Successfully Registered!"} + log_exit(resp, log) + return ORJSONResponse(resp, 201) except (ConnectionFailure, OperationFailure) as ex: if ex.has_error_label("TransientTransactionError"): retry += 1 @@ -72,7 +76,10 @@ def register(req: User.register_type()) -> ORJSONResponse: # type: ignore logger.exception("Error executing bulk write!") session.abort_transaction() break - return ORJSONResponse({"message": "Registration Failed!"}, 409) + + resp = {"message": "Registration Failed!"} + log_exit(resp, log) + return ORJSONResponse(resp, 409) @router.post( diff --git a/jac-cloud/jac_cloud/jaseci/utils/__init__.py b/jac-cloud/jac_cloud/jaseci/utils/__init__.py index 31f0428e00..f3c00ae8a8 100644 --- a/jac-cloud/jac_cloud/jaseci/utils/__init__.py +++ b/jac-cloud/jac_cloud/jaseci/utils/__init__.py @@ -1,10 +1,10 @@ """Jaseci Utilities.""" -import logging from datetime import datetime, timedelta, timezone from random import choice from string import ascii_letters, digits +from .logger import log_dumps, log_entry, log_exit, logger from .mail import Emailer, SendGridEmailer @@ -23,14 +23,14 @@ def utc_timestamp(**addons: int) -> int: return int(utc_datetime(**addons).timestamp()) -logger = logging.getLogger(__name__) -# logger.addHandler(logging.StreamHandler(sys.stdout)) - __all__ = [ "Emailer", "SendGridEmailer", "random_string", "utc_datetime", "utc_timestamp", + "log_dumps", + "log_entry", + "log_exit", "logger", ] diff --git a/jac-cloud/jac_cloud/jaseci/utils/logger.py b/jac-cloud/jac_cloud/jaseci/utils/logger.py new file mode 100644 index 0000000000..642c07e826 --- /dev/null +++ b/jac-cloud/jac_cloud/jaseci/utils/logger.py @@ -0,0 +1,306 @@ +"""Jaseci Log Handlers.""" + +from datetime import time as dtime +from enum import IntEnum +from io import text_encoding +from itertools import chain +from json import dumps +from logging import FileHandler, LogRecord, getLogger +from logging.handlers import ( + BaseRotatingHandler, + RotatingFileHandler, + TimedRotatingFileHandler, +) +from os import getenv, remove, stat +from os.path import exists, getmtime, isfile +from pathlib import Path +from re import ASCII, compile, escape +from stat import ST_MTIME +from time import gmtime, localtime, strftime, time as ttime +from typing import Any + +from ecs_logging import StdlibFormatter + +DEFAULT_PART = [0] +DEFAULT_SEPARATORS = (",", ":") + + +class MixedTimedRotatingFileHandler(TimedRotatingFileHandler, RotatingFileHandler): + """Merge TimedRotatingFileHandler and RotatingFileHandler.""" + + # mypy bug: Incompatible types in assignment + # (expression has type "int", base class "RotatingFileHandler" defined the type as "str") + # even RotatingHandler declared as int + maxBytes: int # type: ignore[assignment] # noqa: N815 + + def __init__( + self, + filename: str, + when: str = "h", + interval: int = 1, + max_bytes: int = 0, + backup_count: int = 0, + encoding: str | None = None, + delay: bool = False, + utc: bool = False, + at_time: dtime | None = None, + errors: str | None = None, + ) -> None: + """Override __init__.""" + Path(filename).parent.mkdir(parents=True, exist_ok=True) + BaseRotatingHandler.__init__( + self, + filename, + "a", + encoding=text_encoding(encoding), + delay=delay, + errors=errors, + ) + + file_path = Path(self.baseFilename) + self.file_folder = file_path.parent.absolute() + self.file_name = file_path.stem + self.file_ext = file_path.suffix + + self.maxBytes: int = max_bytes + self.when = when.upper() + self.backupCount = backup_count + self.utc = utc + self.atTime = at_time + + re_format = escape(self.file_name) + r"-[0-9]+" + escape(self.file_ext) + "$" + if self.when == "S": + self.interval = 1 + self.prefix = "%Y-%m-%d-%H-%M-%S" + ext_match = ( + r"^(? "6": + raise ValueError( + f"Invalid day specified for weekly rollover: {self.when}" + ) + self.dayOfWeek = int(self.when[1]) + self.prefix = "%Y-%m-%d" + ext_match = r"^(? int: + """Pull buffer.""" + if self.has_buffer: + self.has_buffer = False + return self.interval + return 0 + + def build_file_name(self, prefix: str) -> str: + """Override rotation_filename.""" + part_extractor = compile( + r"^" + + escape(f"{prefix}-{self.file_name}-") + + r"([0-9]+)" + + escape(self.file_ext) + + r"$" + ) + + part = ( + max( + chain( + ( + int(match.group(1)) + for path in self.file_folder.iterdir() + if (match := part_extractor.match(path.name)) + ), + DEFAULT_PART, + ) + ) + + 1 + ) + + name = f"{self.file_folder}/{prefix}-{self.file_name}-{part}{self.file_ext}" + + if not callable(self.namer): + return name + else: + return self.namer(name) + + def remove_old_backup(self) -> None: + """Remove old backup files.""" + if self.backupCount > 0: + backups = sorted( + ( + path + for path in self.file_folder.iterdir() + if self.extMatch.match(path.name) + ), + key=getmtime, + ) + + if self.backupCount < (backup_count := len(backups)): + for backup in backups[0 : backup_count - self.backupCount]: + remove(backup) + + def doRollover(self, record: LogRecord) -> None: # type: ignore[override] # noqa: N802 + """Override doRollover.""" + current_time = int(record.created) + t = self.rolloverAt - (self.interval + self.pull_buffer()) + + if self.utc: + time_tuple = gmtime(t) + else: + time_tuple = localtime(t) + dst_now = localtime(current_time)[-1] + dst_then = time_tuple[-1] + if dst_now != dst_then: + if dst_now: + addend = 3600 + else: + addend = -3600 + time_tuple = localtime(t + addend) + prefix = strftime(self.prefix, time_tuple) + + if self.stream: + self.stream.close() + self.stream = None # type: ignore[assignment] + + self.rotate(self.baseFilename, self.build_file_name(prefix)) + self.remove_old_backup() + + if not self.delay: + self.stream = self._open() + + def shouldRollover(self, record: LogRecord) -> bool: # noqa: N802 + """Override shouldRollover.""" + t = int(record.created) + if exists(self.baseFilename) and not isfile(self.baseFilename): + self.rolloverAt = self.computeRollover(t) + return False + if self.stream is None: + self.stream = self._open() + if self.maxBytes > 0: + msg = "%s\n" % self.format(record) + self.stream.seek(0, 2) + if self.stream.tell() + len(msg) >= self.maxBytes: + return True + if t >= self.rolloverAt: + self.has_buffer = True + self.rolloverAt = self.computeRollover(t) + return True + return False + + def computeRollover(self, currentTime: int) -> int: # noqa: N802, N803 + """Override compute roll over to adjust buffer.""" + return super().computeRollover(currentTime) - int(currentTime % self.interval) + + def emit(self, record: LogRecord) -> None: + """ + Emit a record. + + Output the record to the file, catering for rollover as described + in doRollover(). + """ + try: + if self.shouldRollover(record): + self.doRollover(record) + FileHandler.emit(self, record) + except Exception: + self.handleError(record) + + +class Level(IntEnum): + """Log Level Enum.""" + + CRITICAL = 50 + FATAL = CRITICAL + ERROR = 40 + WARNING = 30 + WARN = WARNING + INFO = 20 + DEBUG = 10 + NOTSET = 0 + + +logger = getLogger(getenv("LOGGER_NAME", "app")) +logger.setLevel(Level[getenv("LOGGER_LEVEL", "DEBUG")].value) + +handler = MixedTimedRotatingFileHandler( + getenv("LOGGER_FILE_PATH", "/tmp/jac_cloud_logs/jac-cloud.log"), + when=getenv("LOGGER_ROLLOVER_INTERVAL", "d"), + backup_count=int(getenv("LOGGER_MAX_BACKUP", "-1")), + max_bytes=int(getenv("LOGGER_ROLLOVER_MAX_FILE_SIZE", "10000000")), + utc=getenv("LOGGER_USE_UTC") == "true", +) +handler.setFormatter(StdlibFormatter()) +logger.addHandler(handler) + + +def log_dumps(payload: dict[str, Any] | list[Any]) -> str: + """Dump dictionary log.""" + return dumps(payload, separators=DEFAULT_SEPARATORS) + + +def log_entry( + api: str, caller: str | None, payload: dict[str, Any], node: str | None = None +) -> dict[str, Any]: + """Log metadata on entry.""" + log: dict[str, Any] = { + "api_name": api, + "caller_name": caller, + "payload": log_dumps(payload), + "entry_node": node, + } + msg = str( + f"Incoming call from {log["caller_name"]}" + f" to {log["api_name"]}" + f" with payload: {log["payload"]}" + f" at entry node: {log["entry_node"]}" + ) + log["extra_fields"] = list(log.keys()) + logger.info(msg, extra=log) + + return log + + +def log_exit(response: dict[str, Any], log: dict[str, Any] | None = None) -> None: + """Log metadata on exit.""" + log = log or {} + log["api_response"] = log_dumps(response) + log["extra_fields"] = list(log.keys()) + log_msg = str( + f"Returning call from {log["caller_name"]}" + f" to {log["api_name"]}" + f" with payload: {log["payload"]}" + f" at entry node: {log["entry_node"]}" + f" with response: {log["api_response"]}" + ) + logger.info( + log_msg, + extra=log, + ) diff --git a/jac-cloud/jac_cloud/plugin/jaseci.py b/jac-cloud/jac_cloud/plugin/jaseci.py index d199ffb2e5..41b32defce 100644 --- a/jac-cloud/jac_cloud/plugin/jaseci.py +++ b/jac-cloud/jac_cloud/plugin/jaseci.py @@ -48,6 +48,7 @@ from ..core.context import ContextResponse, ExecutionContext, JaseciContext from ..jaseci import FastAPI from ..jaseci.security import authenticator +from ..jaseci.utils import log_entry, log_exit T = TypeVar("T") @@ -151,6 +152,13 @@ def api_entry( pl = cast(BaseModel, payload).model_dump() body = pl.get("body", {}) + log = log_entry( + cls.__name__, + user.email if (user := getattr(request, "_user", None)) else None, + pl, + node, + ) + if isinstance(body, BaseUploadFile) and body_model: body = loads(syncify(body.read)()) try: @@ -164,13 +172,19 @@ def api_entry( if jctx.validate_access(): wlk.spawn_call(jctx.entry_node) jctx.close() - return jctx.response(wlk.returns) + + resp = jctx.response(wlk.returns) + log_exit(resp, log) + + return ORJSONResponse(resp, jctx.status) else: + error = { + "error": f"You don't have access on target entry{cast(Anchor, jctx.entry_node).ref_id}!" + } jctx.close() - raise HTTPException( - 403, - f"You don't have access on target entry{cast(Anchor, jctx.entry_node).ref_id}!", - ) + + log_exit(error, log) + raise HTTPException(403, error) def api_root( request: Request, diff --git a/jac-cloud/scripts/filebeat-template.yml b/jac-cloud/scripts/filebeat-template.yml new file mode 100644 index 0000000000..bfcec61443 --- /dev/null +++ b/jac-cloud/scripts/filebeat-template.yml @@ -0,0 +1,20 @@ +filebeat.inputs: +- type: log + enabled: true + paths: + - /tmp/jac_cloud_logs/*-jac-cloud-*.log + - /tmp/jac_cloud_logs/jac-cloud.log + json: + keys_under_root: true + overwrite_keys: true + add_error_key: true + expand_keys: true + +output.elasticsearch: + hosts: ["localhost:9200"] + protocol: https + api_key: "id:api_key" + index: "filebeat-testing" + +setup.template.name: "filebeat" +setup.template.pattern: "filebeat-*" \ No newline at end of file diff --git a/jac-cloud/setup.py b/jac-cloud/setup.py index d913b23e6a..a087823435 100644 --- a/jac-cloud/setup.py +++ b/jac-cloud/setup.py @@ -31,6 +31,7 @@ "google-auth~=2.32.0", "asyncer~=0.0.8", "fakeredis~=2.24.1", + "ecs-logging~=2.2.0", ], package_data={}, entry_points={