Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix/memory issue test 1 #31

Merged
merged 12 commits into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions .Dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
.git
.gitignore

# Python
*/.venv/

# CI
.codeclimate.yml
.travis.yml
Expand All @@ -12,7 +15,10 @@
docker-compose.yml
.docker
.dockerignore
Dockerfile?
Dockerfile
Dockerfile.prod
Dockerfile.stag
Dockerfile.dev

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down Expand Up @@ -78,6 +84,7 @@ target/
# Virtual environment
.venv/
venv/
.vscode/

# PyCharm
.idea
Expand All @@ -101,7 +108,6 @@ venv/
README.md

# Library dependecy metadata
poetry.lock

# github workflows
.github/
Expand All @@ -112,4 +118,23 @@ poetry.lock
volumes/

# Task
tasks/
tasks/

# Example
app/example/

# Gitpod
scripts/gitpod*
scripts/codespaces*

# Github workflows
.github
.devcontainer

# Gitpod
scripts/gitpod*
scripts/codespaces*
.gitpod*

# Env Files
.env*
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[flake8]
; ignore = E266, E501, W503, E203, C901
ignore = E501 app/models/
ignore = E501
exclude = .eggs,*.egg-info,.git,.hg,.tox, __pycache__,.vscode,.venv,__init__.py,.mypy_cache,.pytest_cache
max-line-length = 79
max-complexity = 18
Expand Down
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ cython_debug/
!.vscode/launch.json
!.vscode/extensions.json
!.vscode/*.code-snippets
.vscode

# Local History for Visual Studio Code
.history/
Expand All @@ -175,4 +176,7 @@ cython_debug/
.ionide

# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode
n
n

# Example
app/example/
5 changes: 3 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
files: app/
repos:
- repo: https://github.com/psf/black
rev: 22.3.0
hooks:
- id: black
language_version: python3
- repo: https://github.com/pycqa/flake8
rev: 4.0.1
rev: 7.0.0
hooks:
- id: flake8
- repo: https://github.com/timothycrosley/isort
rev: 5.9.3
rev: 5.12.0
hooks:
- id: isort
11 changes: 4 additions & 7 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.9
FROM python:3.9.16-buster

WORKDIR /app

ENV POETRY_VERSION=1.2.0

# Install Poetry
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=/opt/poetry python && \
RUN curl -sSL https://install.python-poetry.org/ | POETRY_HOME=/opt/poetry python && \
cd /usr/local/bin && \
ln -s /opt/poetry/bin/poetry && \
poetry config experimental.new-installer false && \
poetry config virtualenvs.create false

# Copy poetry.lock* in case it doesn't exist in the repo
COPY ./pyproject.toml ./poetry.lock* /
COPY ./pyproject.toml ./poetry.lock* /

# Allow installing dev dependencies to run tests
ARG INSTALL_DEV=false
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --no-dev ; fi"
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --only main ; fi"

COPY . .
ENV PYTHONPATH=/app
10 changes: 4 additions & 6 deletions Dockerfile.dev
Original file line number Diff line number Diff line change
@@ -1,22 +1,20 @@
FROM tiangolo/uvicorn-gunicorn-fastapi:python3.9
FROM python:3.9.16-buster

WORKDIR /app

ENV POETRY_VERSION=1.2.0
ENV POETRY_VERSION=1.5.1

# Install Poetry
RUN curl -sSL https://install.python-poetry.org | POETRY_HOME=/opt/poetry python && \
cd /usr/local/bin && \
ln -s /opt/poetry/bin/poetry && \
poetry config experimental.new-installer false && \
export PATH="/opt/poetry/bin:$PATH" && \
poetry config virtualenvs.create false

# Copy poetry.lock* in case it doesn't exist in the repo
COPY ./pyproject.toml ./poetry.lock* /

# Allow installing dev dependencies to run tests
ARG INSTALL_DEV=false
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then poetry install --no-root ; else poetry install --no-root --no-dev ; fi"
RUN bash -c "if [ $INSTALL_DEV == 'true' ] ; then /opt/poetry/bin/poetry install --no-root ; else /opt/poetry/bin/poetry install --no-root --no-dev ; fi"

COPY . .
ENV PYTHONPATH=/app
20 changes: 20 additions & 0 deletions Dockerfile.prod
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
FROM python:3.10-slim-buster as requirements-stage

WORKDIR /tmp
RUN pip install poetry
COPY ./pyproject.toml ./poetry.lock* /tmp/

RUN mkdir -p /tmp/app
COPY ./app /tmp/app

RUN poetry export -f requirements.txt --output requirements.txt --without-hashes


FROM python:3.10-slim-buster

WORKDIR /code

COPY --from=requirements-stage /tmp/requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt

COPY --from=requirements-stage /tmp/app /code/app
13 changes: 10 additions & 3 deletions app/api/api_v1/routers/prefetch.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from fastapi import APIRouter

from app.models.prefetch import Prefetch
from app.models.prefetch import Prefetch, PrefetchResponse
from app.utils.tasks import prefetch_profiles

prefetch_router = router = APIRouter()
Expand All @@ -24,10 +24,17 @@ async def prefetch_profiles_background(prefetch: Prefetch):
urls = prefetch.urls
minimal = prefetch.minimal
samples_to_fetch = prefetch.samples_to_fetch
trigger_id = prefetch.trigger_id

# Prefetch Profiles as a background job
result = prefetch_profiles.delay(
urls=urls, minimal=minimal, samples_to_fetch=samples_to_fetch
urls=urls,
minimal=minimal,
samples_to_fetch=samples_to_fetch,
trigger_id=trigger_id,
)

return result.id
return PrefetchResponse(
task_id=result.id,
trigger_id=trigger_id,
)
2 changes: 1 addition & 1 deletion app/api/api_v1/routers/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List

from fastapi import APIRouter, Depends
from pandas_profiling import ProfileReport
from ydata_profiling import ProfileReport

from app.core.config import Settings
from app.models.alerts import Alerts
Expand Down
15 changes: 13 additions & 2 deletions app/core/config.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List

from pydantic import BaseSettings
from pydantic_settings import BaseSettings


class Settings(BaseSettings):
Expand Down Expand Up @@ -41,10 +41,21 @@ class Settings(BaseSettings):

# MODEL PARAMS
# Constraint for Column names
COLUMN_NAME_REGEX_PATTERN = r"[\w\s]*"
COLUMN_NAME_REGEX_PATTERN: str = r"[\w\s]*"

# PROFILE SEGMENTS
SAMPLE_DATA_RENDERER: List[str] = ["head"]

# LOGGING SETTINGS
LOG_LEVEL: str = "DEBUG"
LOG_FILE_PATH: str = "logs/app.log"
LOG_FILE_SIZE: int = 100_000_000 # 100MB
LOG_FILE_BACKUP_COUNT: int = 5
LOG_FORMAT: str = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"

# PROFILING SETTINGS
PROGRESS_BAR: bool = True

class Config:
env_file = ".env"
extra = "ignore"
56 changes: 56 additions & 0 deletions app/core/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import logging
import logging.config
import os

from app.core.config import Settings

settings = Settings()

# Create the logs directory if it doesn't exist
log_directory = os.path.dirname(settings.LOG_FILE_PATH)
if not os.path.exists(log_directory):
os.makedirs(log_directory)

# Configuration dictionary for logging
LOGGING_CONFIG = {
"version": 1,
"disable_existing_loggers": False,
"formatters": {
"default": {
"format": "%(asctime)s [%(levelname)s] [%(name)s:%(lineno)d] - %(message)s", # noqa: E501
"datefmt": "%Y-%m-%d %H:%M:%S",
},
},
"handlers": {
"console": {
"class": "rich.logging.RichHandler",
"level": settings.LOG_LEVEL,
},
},
"loggers": {
"": {
"level": settings.LOG_LEVEL,
"handlers": ["console"],
"propagate": True,
},
"celery": {
"level": settings.LOG_LEVEL,
"handlers": ["console"],
"propagate": True,
},
},
}

# Load the logging configuration
logging.config.dictConfig(LOGGING_CONFIG)


def get_logger(name: str) -> logging.Logger:
"""
Get a logger with the specified name.
Args:
name (str): The name of the logger.
Returns:
logging.Logger: The logger instance.
"""
return logging.getLogger(name)
8 changes: 4 additions & 4 deletions app/models/alerts.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

from typing import List
from typing import List, Optional

from pydantic import BaseModel
from pydantic import RootModel


class Alerts(BaseModel):
__root__: List[str]
class Alerts(RootModel[Optional[List[str]]]):
pass
10 changes: 5 additions & 5 deletions app/models/analysis.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from datetime import datetime, timedelta
from datetime import datetime
from typing import Optional

from pydantic.main import BaseModel


class Analysis(BaseModel):
title: str
date_start: datetime
date_end: datetime
duration: timedelta
title: Optional[str]
date_start: Optional[datetime]
date_end: Optional[datetime]

class Config:
underscore_attrs_are_private = True
1 change: 1 addition & 0 deletions app/models/correlations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ class Correlations(BaseModel):
kendall: Optional[Union[Json, Dict]]
cramers: Optional[Union[Json, Dict]]
phi_k: Optional[Union[Json, Dict]]
# auto: Optional[Union[Json, Dict, Any]]

class Config:
underscore_attrs_are_private = True
8 changes: 4 additions & 4 deletions app/models/duplicates.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import Union
from typing import Any

from pydantic import BaseModel, Json
from pydantic import RootModel


class Duplicates(BaseModel):
__root__: Union[Json, str]
class Duplicates(RootModel[Any]):
pass
Loading
Loading