Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
Signed-off-by: George Lemon <georgelemon@protonmail.com>
  • Loading branch information
georgelemon committed Oct 30, 2024
1 parent 4bba76d commit a2f9c83
Show file tree
Hide file tree
Showing 5 changed files with 380 additions and 11 deletions.
27 changes: 16 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
<p align="center">
<img src="https://github.com/openpeeps/PKG/blob/main/.github/logo.png" width="90px"><br>
OpenPeeps repository template for developing libraries,<br>projects and other cool things. 👑 Written in Nim language
<img src="https://github.com/openpeeps/multipart/blob/main/.github/logo.png" width="90px"><br>
A simple, fault tolerant multipart parser 👑 Written in Nim language
</p>

<p align="center">
<code>nimble install {PKG}</code>
<code>nimble install multipart</code>
</p>

<p align="center">
<a href="https://github.com/">API reference</a><br>
<img src="https://github.com/openpeeps/pistachio/workflows/test/badge.svg" alt="Github Actions"> <img src="https://github.com/openpeeps/pistachio/workflows/docs/badge.svg" alt="Github Actions">
<img src="https://github.com/openpeeps/multipart/workflows/test/badge.svg" alt="Github Actions"> <img src="https://github.com/openpeeps/multipart/workflows/docs/badge.svg" alt="Github Actions">
</p>

## 😍 Key Features
- [x] Open Source | `MIT` License
- [x] Written in Nim language
- [x] Framework agnostic
- [x] On-the-Fly Content validation via Callbacks
- [x] Fault tolerant. Skips invalid boundaries

## Examples
...
todo. See `/tests`

### Need a input validator?
If you're looking for a full input validator you can use `openpeeps/bag` package to validate input data, forms,
including `multipart/form-data`. Give a try https://github.com/openpeeps/bag

### ❤ Contributions & Support
- 🐛 Found a bug? [Create a new Issue](/issues)
- 👋 Wanna help? [Fork it!](/fork)
- 🐛 Found a bug? [Create a new Issue](https://github.com/openpeeps/multipart/issues)
- 👋 Wanna help? [Fork it!](https://github.com/openpeeps/multipart/fork)
- 😎 [Get €20 in cloud credits from Hetzner](https://hetzner.cloud/?ref=Hm0mYGM9NxZ4)
- 🥰 [Donate via PayPal address](https://www.paypal.com/donate/?hosted_button_id=RJK3ZTDWPL55C)

### 🎩 License
{PKG} | MIT license. [Made by Humans from OpenPeeps](https://github.com/openpeeps).<br>
Copyright &copy; 2023 OpenPeeps & Contributors &mdash; All rights reserved.
`multipart` | MIT license. [Made by Humans from OpenPeeps](https://github.com/openpeeps).<br>
Copyright &copy; 2024 OpenPeeps & Contributors &mdash; All rights reserved.
14 changes: 14 additions & 0 deletions multipart.nimble
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# Package

version = "0.1.0"
author = "George Lemon"
description = "A simple multipart parser"
license = "MIT"
srcDir = "src"
# bin = @["multipart"]
# binDir = "bin"

# Dependencies

requires "nim >= 2.0.0"
requires "checksums"
303 changes: 303 additions & 0 deletions src/multipart.nim
Original file line number Diff line number Diff line change
@@ -0,0 +1,303 @@
# A simple, framework agnostic parser for
# handling multipart/form-data content-type in Nim
#
# (c) 2024 George Lemon | MIT License
# Made by Humans from OpenPeeps
# https://github.com/supranim/multipart

import std/[os, streams, strutils,
parseutils, options, oids, sequtils]

import pkg/checksums/md5

type
MultipartHeader* = enum
contentDisposition = "content-disposition"
contentType = "content-type"

MultipartDataType* = enum
MultipartFile
MultipartText

MultipartFileSigantureState* = enum
stateInvalidMagic
stateMoreMagic
stateValidMagic

MultipartHeaderTuple* = tuple[key: MultipartHeader, value: seq[(string, string)]]

MultipartFileCallback* =
proc(boundary: ptr Boundary, pos: int, c: ptr char): bool {.closure.}
## A callback that runs while parsing a `MultipartFile` boundary

MultipartFileCallbackSignature* = proc(boundary: ptr Boundary, pos: int, c: ptr char): MultipartFileSigantureState {.closure.}
## A callback to collect magic numbers signature
## while writing the temporary file

MultipartTextCallback* = proc(boundary: ptr Boundary, data: ptr string): bool {.closure.}
## A callback that returns data of a `MultipartText`.
##
## This callback can be used for on-the-fly validation of
## string-based data from input fields

# BoundaryEndCallback* = proc(boundary: Boundary): bool {.nimcall.}
# A callback that runs after parsing a boundary
BoundaryState* = enum
boundaryInit
boundaryAdded
## marks Boundary as added to `boundaries` sequenece
boundaryRemoved
## can be set by external validators via `MultipartFileCallback`
## once invalidated, the `Boundary` will be moved to `invalidBoundaries`

Boundary* = object
state: BoundaryState
fieldName*: string
case dataType*: MultipartDataType
of MultipartFile:
fileId*, fileName*, fileType*, filePath*: string
fileContent*: File
else:
value*: string

Multipart* = object
tmpDirectory: string
# Will use a temporary path to store files at
# `getTempDir() / getMD5(getAppDir())`
boundaryLine: string
# Holds the boundary line retrieved from a
# `Content-type` header
fileCallback*: ptr MultipartFileCallback
## A `MultipartFileCallback` that runs while in
## `MultipartFile` boundary
fileSignatureCallback*: ptr MultipartFileCallbackSignature
## Collects magic numbers that required for verifying
## the file type. The callback must return one of
## `MultipartFileSigantureState` states. Use `stateMoreMagic`
## to run `fileSignatureCallback` again for colelcting more bytes.
##
## If the magic numbers are correct use `stateValidMagic`
## to stop the callback and continue writing the file.
##
## `stateInvalidMagic` will mark the boundary as invalid,
## skip it to `invalidBoundaries` and stops the callback.
##
## `stateValidMagic` will continue writing the file on disk
## and stops the signature callback
boundaries: seq[Boundary]
# A sequence of Boundary objects
invalidBoundaries*: seq[Boundary]
# A sequence of removed boundaries

MultipartInvalidHeader* = object of CatchableError

proc parseHeader(line: string): MultipartHeaderTuple =
result.value = @[]
var key: string
var i = 0
i = line.parseUntil(key, ':')
inc(i) # skip :
result.key = parseEnum[MultipartHeader](key.toLowerAscii)
if result.key == contentType:
i += line.skipWhitespace(i)
result.value.add (line.substr(i), newStringOfCap(0))
else:
var v: string
while i < line.len and line[i] notin Newlines:
i += line.skipWhitespace(i)
i += line.parseUntil(v, {';'}, i)
if v == "form-data":
setLen(v, 0) # skip form-data
else:
let kv = v.split('=', 1)
add result.value, (kv[0], kv[1].unescape)
inc(i)

template skipWhitespaces =
while true:
case curr
of Whitespace:
curr = body.readChar()
else: break

template skipNewlines =
while true:
case curr
of '\r', '\n':
curr = body.readChar
else: break

const
contentDispositionLen = len($contentDisposition)
contentTypeLen = len($contentType)

template runFileCallback(someBoundary) {.dirty.} =
if mp.fileCallback != nil:
if mp.fileCallback[](someBoundary,
someBoundary.fileContent.getFilePos(), curr.addr):
discard
else:
someBoundary.fileContent.close()
skipUntilNextBoundary = true
break

template parseBoundary {.dirty.} =
var currBoundary: string
add currBoundary, curr
curr = body.readChar()
let len = len(boundary)
add currBoundary, curr
case curr:
of '-':
if body.peekStr(len).startsWith(boundary):
add currBoundary, body.readStr(len)
curr = body.readChar()
skipWhitespaces()
if body.peekStr(2) == "--":
while not body.atEnd:
discard body.readChar() # consume remaining chars
break
else:
var headers: seq[MultipartHeaderTuple]
while true:
if "c" & body.peekStr(contentDispositionLen - 1).toLowerAscii == $contentDisposition:
var heading: string
add heading, curr
add heading, body.readStr(contentDispositionLen)
curr = body.readChar()
while curr notin Newlines:
add heading, curr
curr = body.readChar()
add headers, parseHeader(heading)
# curr = body.readChar() # new line
skipNewlines()
elif "c" & body.peekStr(contentTypeLen - 1).toLowerAscii == $contentType:
var heading: string
add heading, curr
add heading, body.readStr(contentTypeLen)
curr = body.readChar()
while curr notin Newlines:
add heading, curr
curr = body.readChar()
add headers, parseheader(heading)
skipNewlines()
else: break
skipNewlines()
if prevStreamBoundary.isSome:
prevStreamBoundary.get[].fileContent.close()
prevStreamBoundary = none(ptr Boundary)
if headers.len == 2:
let fileId = $genOid()
let filepath = mp.tmpDirectory / fileId
add mp.boundaries,
Boundary(
dataType: MultipartFile,
fileId: fileId,
fieldName: headers[0].value[0][1],
fileName: headers[0].value[1][1],
fileType: headers[1].value[0][0],
filePath: filepath,
fileContent: open(filepath, fmWrite)
)
prevStreamBoundary = some(mp.boundaries[^1].addr)
write(prevStreamBoundary.get[].fileContent, curr)
runFileCallback(prevStreamBoundary.get)
elif headers.len == 1:
var inputBoundary =
Boundary(
dataType: MultipartText,
fieldName: headers[0].value[0][1]
)
add inputBoundary.value, curr
add mp.boundaries, inputBoundary
setLen(currBoundary, 0)
else:
if prevStreamBoundary.isSome:
write(prevStreamBoundary.get[].fileContent, currBoundary)
else:
add mp.boundaries[^1].value, currBoundary
setLen(currBoundary, 0)
else: discard
if prevStreamBoundary.isSome:
write(prevStreamBoundary.get[].fileContent, currBoundary)
setLen(currBoundary, 0)

#
# Public API
#
proc initMultipart*(contentType: string,
fileCallback: ptr MultipartFileCallback = nil,
tmpDir = ""
): Multipart =
## Initializes an instance of `Multipart`
result.tmpDirectory =
if tmpDir.len > 0: tmpDir
else: getTempDir() / getMD5(getAppDir())
result.boundaryLine = contentType
if fileCallback != nil:
result.fileCallback = fileCallback

proc parse*(mp: var Multipart, body: sink string, tmpDir = "") =
## Parse and return a `Multipart` instance
var
i = 0
prevStreamBoundary: Option[ptr Boundary]
multipartType: string
multipartBoundary: string
i += mp.boundaryLine.parseUntil(multipartType, {';'}, i)
i += mp.boundaryLine.skipWhitespace(i)
i += mp.boundaryLine.parseUntil(multipartBoundary, {'\c', '\l'}, i)
let boundary = multipartBoundary.split("boundary=")[1]
discard existsOrCreateDir(mp.tmpDirectory)
var
body = newStringStream(body)
skipUntilNextBoundary: bool
curr: char
while not atEnd(body):
if skipUntilNextBoundary:
while curr != '-' and (body.atEnd == false):
curr = body.readChar()
parseBoundary()
skipUntilNextBoundary = false
else:
curr = body.readChar()
case curr
of Newlines:
if prevStreamBoundary.isSome:
write(prevStreamBoundary.get[].fileContent, curr)
runFileCallback(prevStreamBoundary.get)
of '-':
parseBoundary()
else:
let currBoundary: ptr Boundary = addr(mp.boundaries[^1])
if currBoundary != nil:
case currBoundary[].dataType
of MultipartFile:
write(currBoundary[].fileContent, curr)
runFileCallback(currBoundary)
of MultipartText:
add currBoundary[].value, curr
if prevStreamBoundary.isSome:
prevStreamBoundary.get[].fileContent.close()
body.close()

proc getTempDir*(mp: Multipart): string =
## Returns the temporary directory path
mp.tmpDirectory

proc getPath*(boundary: Boundary): string =
## Return the file path of a `Boundary` object
## if the boundary data type is `MultipartDataType`
## Check type using `getType`
result = boundary.filePath

proc getMagicNumbers*(boundary: Boundary): seq[byte] =
## Returns the magic numbers collected while parsing the `boundary`

proc len*(mp: Multipart): int = mp.boundaries.len

iterator items*(mp: Multipart): Boundary =
## Iterate over available boundaries in
## the `Multipart` instance
for b in mp.boundaries:
yield b
1 change: 1 addition & 0 deletions tests/config.nims
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
switch("path", "$projectDir/../src")
Loading

0 comments on commit a2f9c83

Please sign in to comment.