summaryrefslogtreecommitdiff
path: root/lib/uRI.ml
blob: 3e9963f95e27a337ba5addca45ab3bc7bc92f251 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
(*─────────────────────────────────────────────────────────────────────────────┐
│ SPDX-FileCopyrightText: 2025 toastal <https://toast.al/contact/>             │
│ SPDX-License-Identifier: LGPL-2.1-or-later WITH OCaml-LGPL-linking-exception │
└─────────────────────────────────────────────────────────────────────────────*)
(* extend & fix naming for ocaml-uri *)
include Uri

let jsont : t Jsont.t =
	Jsont.string
	|> Jsont.map ~kind: "URI" ~dec: of_string ~enc: to_string

(* Validate URI for security concerns *)
let acceptable_schemes = ["http"; "https"; "ftp"; "sftp"; "file"; "ssh"; "git"; "darcs"; "pijul"; "fossil"]

let is_acceptable_scheme scheme =
	List.mem (String.lowercase_ascii scheme) acceptable_schemes

(* URL-decode a string - handles %XX hex sequences *)
let url_decode s =
	let buf = Buffer.create (String.length s) in
	let i = ref 0 in
	while !i < String.length s do
		if s.[!i] = '%' && !i + 2 < String.length s then
			try
				let hex = String.sub s (!i + 1) 2 in
				let code = int_of_string ("0x" ^ hex) in
				Buffer.add_char buf (Char.chr code);
				i := !i + 3
			with _ -> 
				Buffer.add_char buf s.[!i];
				incr i
		else
			Buffer.add_char buf s.[!i];
			incr i
	done;
	Buffer.contents buf

let contains_substring s substr =
	let re = Str.regexp_string substr in
	try
		ignore (Str.search_forward re s 0);
		true
	with Not_found -> false

let has_path_traversal uri =
	let raw_path = path uri in
	let decoded_path = url_decode raw_path in
	(* Check both raw and decoded paths for traversal sequences *)
	let check_path path_str =
		contains_substring path_str ".." && (
			contains_substring path_str "/../" ||
			contains_substring path_str "\\..\\" ||
			String.starts_with ~prefix:"../" path_str ||
			String.ends_with ~suffix:"/.." path_str ||
			String.ends_with ~suffix:"/.." (if String.length path_str > 0 && path_str.[0] = '/' then path_str else "/" ^ path_str)
		)
	in
	check_path raw_path || check_path decoded_path

let validate uri =
	match scheme uri with
	| Some scheme when is_acceptable_scheme scheme ->
		if has_path_traversal uri then
			Error (`Path_traversal (path uri))
		else
			Ok ()
	| Some scheme ->
		Error (`Invalid_scheme scheme)
	| None ->
		Error (`Invalid_scheme "missing")

(* good enough URI generation for now for this *)
let gen =
	let open QCheck.Gen in
	let a_to_z = (char_range 'a' 'z') in
	let* scheme = QCheck.Gen.oneof_list ["http"; "https"; "ftp"; "sftp"] in
	let* host = string_size ~gen: a_to_z (int_bound 20) in
	let* tld = string_size ~gen: a_to_z (int_bound 5) in
	let* path_opt = option (string_size ~gen: a_to_z (int_bound 10)) in
	let uri =
		of_string @@
			Fmt.str "%s://%s.%s/%s" scheme host tld (Option.value ~default: "" path_opt)
	in
	return uri