diff --git a/.env b/.env new file mode 100644 index 0000000..efa4180 --- /dev/null +++ b/.env @@ -0,0 +1 @@ +TIMETABLE_API_URL=https://timetable.unsw.edu.au/year/ \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 65dec4c..3cef513 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -240,7 +240,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -275,7 +275,7 @@ checksum = "531b97fb4cd3dfdce92c35dedbfdc1f0b9d8091c8ca943d6dae340ef5012d514" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -646,7 +646,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -865,7 +865,7 @@ checksum = "f95e2801cd355d4a1a3e3953ce6ee5ae9603a5c833455343a8bfe3f44d418246" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -2008,7 +2008,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -2334,9 +2334,9 @@ checksum = "dc375e1527247fe1a97d8b7156678dfe7c1af2fc075c9a4db3690ecd2a148068" [[package]] name = "proc-macro2" -version = "1.0.72" +version = "1.0.84" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a293318316cf6478ec1ad2a21c49390a8d5b5eae9fab736467d93fbc0edc29c5" +checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" dependencies = [ "unicode-ident", ] @@ -2352,9 +2352,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.33" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] @@ -2801,22 +2801,22 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.193" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25dd9975e68d0cb5aa1120c288333fc98731bd1dd12f561e468ea4728c042b89" +checksum = "7253ab4de971e72fb7be983802300c30b5a7f0c2e56fab8abfc6a214307c0094" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.193" +version = "1.0.203" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43576ca501357b9b071ac53cdc7da8ef0cbd9493d8df094cd821777ea6e894d3" +checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -2838,7 +2838,7 @@ checksum = "3081f5ffbb02284dda55132aa26daecedd7372a42417bbbab6f14ab7d6bb9145" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -2950,6 +2950,7 @@ dependencies = [ "reqwest", "scraper", "select", + "serde", "tokio", ] @@ -3029,9 +3030,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.43" +version = "2.0.66" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee659fb5f3d355364e1f3e5bc10fb82068efbf824a1e9d1c9504244a6469ad53" +checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" dependencies = [ "proc-macro2", "quote", @@ -3147,7 +3148,7 @@ checksum = "3dcf4a824cce0aeacd6f38ae6f24234c8e80d68632338ebaa1443b5df9e29e19" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -3231,7 +3232,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -3300,7 +3301,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", ] [[package]] @@ -3523,7 +3524,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", "wasm-bindgen-shared", ] @@ -3557,7 +3558,7 @@ checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" dependencies = [ "proc-macro2", "quote", - "syn 2.0.43", + "syn 2.0.66", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index b797216..372c483 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,10 @@ chrono = "0.4.31" tokio = { version = "1", features = ["full"] } reqwest = "0.11" select = "0.5" -scraper = "0.12" +scraper = "0.12" dotenv = "0.15.0" regex = "1.10.4" logger = "0.4.0" log = "0.4.21" env_logger = "0.11.3" +serde = "1.0.203" diff --git a/SomeFile.txt b/SomeFile.txt new file mode 100644 index 0000000..98a3ade --- /dev/null +++ b/SomeFile.txt @@ -0,0 +1,102 @@ +Label: Faculty , Data: Faculty of Engineering +Label: School , Data: School of Computer Sci & Eng +Label: Go to, Data: Online Handbook record Select new Course for same Subject AreaSelect new Subject Area +Label: Campus , Data: Sydney +Label: Career , Data: Undergraduate +["U1", "Dr AJ Mahidadia", "14-JAN-2024"] +["T1", "Mr K Luxa", "10-MAR-2024"] +["T2", "Mx S Mautner", "23-JUN-2024"] +["T3", "Dr S Ruj", "06-OCT-2024"] +["U1", "1023", "CR01", "Open", "0/70"] +["U1", "1793", "1UGA", "Open", "157/216", "Tue 09:00 - 11:00 (Weeks:1-5), Tue 11:30 - 13:30 (Weeks:1-5), Wed 09:00 - 11:00 (Weeks:1-5), Wed 11:30 - 13:30 (Weeks:1-5)"] +["U1", "1794", "H09A", "Open", "16/24", "Thu 09:00 - 10:00 (Weeks:1-5), Thu 10:00 - 12:00 (Weeks:1-5)"] +["U1", "1795", "H12A", "Open", "21/24", "Thu 12:00 - 13:00 (Weeks:1-5), Thu 13:00 - 15:00 (Weeks:1-5)"] +["U1", "2166", "H12B", "Open", "17/24", "Thu 12:00 - 13:00 (Weeks:1-5), Thu 13:00 - 15:00 (Weeks:1-5)"] +["U1", "1796", "H15A", "Open", "12/24", "Thu 15:00 - 16:00 (Weeks:1-5), Thu 16:00 - 18:00 (Weeks:1-5)"] +["U1", "1797", "H15B", "Open", "16/24", "Thu 15:00 - 16:00 (Weeks:1-5), Thu 16:00 - 18:00 (Weeks:1-5)"] +["U1", "1798", "T14A", "Open", "20/24", "Tue 14:00 - 15:00 (Weeks:1-5), Tue 15:00 - 17:00 (Weeks:1-5)"] +["U1", "1799", "T14B", "Open", "17/24", "Tue 14:00 - 15:00 (Weeks:1-5), Tue 15:00 - 17:00 (Weeks:1-5)"] +["U1", "1800", "W14A", "Open", "22/24", "Wed 14:00 - 15:00 (Weeks:1-5), Wed 15:00 - 17:00 (Weeks:1-5)"] +["U1", "1801", "W14B", "Open", "16/24", "Wed 14:00 - 15:00 (Weeks:1-5), Wed 15:00 - 17:00 (Weeks:1-5)"] +["T1", "2424", "CR01", "Open", "0/101"] +["T1", "8855", "1UGA", "Open", "334/350", "Tue 09:00 - 11:00 (Weeks:1-5,7-10)"] +["T1", "8856", "1UGB", "Open", "398/442", "Tue 09:00 - 11:00 (Weeks:1-5,7-10)"] +["T1", "8857", "1UGC", "Open", "327/350", "Thu 11:00 - 13:00 (Weeks:1-5,7-10)"] +["T1", "8858", "1UGD", "Open", "405/442", "Thu 11:00 - 13:00 (Weeks:1-5,7-10)"] +["T1", "8859", "F09A", "Open", "22/24", "Fri 09:00 - 10:00 (Weeks:1-5,8-10), Fri 10:00 - 12:00 (Weeks:1-5,8-10)"] +["T1", "8860", "F09B", "Open", "22/24", "Fri 09:00 - 10:00 (Weeks:1-5,8-10), Fri 10:00 - 12:00 (Weeks:1-5,8-10)"] +["T1", "8861", "F09C", "Open", "21/24", "Fri 09:00 - 10:00 (Weeks:1-5,8-10), Fri 10:00 - 12:00 (Weeks:1-5,8-10)"] +["T1", "8862", "F11A", "Open", "23/24", "Fri 11:00 - 12:00 (Weeks:1-5,8-10), Fri 12:00 - 14:00 (Weeks:1-5,8-10)"] +["T1", "8863", "F11B", "Open", "23/24", "Fri 11:00 - 12:00 (Weeks:1-5,8-10), Fri 12:00 - 14:00 (Weeks:1-5,8-10)"] +["T1", "8864", "F11C", "Open", "23/24", "Fri 11:00 - 12:00 (Weeks:1-5,8-10), Fri 12:00 - 14:00 (Weeks:1-5,8-10)"] +["T1", "8865", "F13A", "Open", "22/24", "Fri 13:00 - 14:00 (Weeks:1-5,8-10), Fri 14:00 - 16:00 (Weeks:1-5,8-10)"] +["T1", "8866", "F13B", "Open", "21/24", "Fri 13:00 - 14:00 (Weeks:1-5,8-10), Fri 14:00 - 16:00 (Weeks:1-5,8-10)"] +["T1", "8867", "F13C", "Open", "23/24", "Fri 13:00 - 14:00 (Weeks:1-5,8-10), Fri 14:00 - 16:00 (Weeks:1-5,8-10)"] +["T1", "8868", "F15A", "Open", "21/24", "Fri 15:00 - 16:00 (Weeks:1-5,8-10), Fri 16:00 - 18:00 (Weeks:1-5,8-10)"] +["T1", "8869", "F15B", "Open", "21/24", "Fri 15:00 - 16:00 (Weeks:1-5,8-10), Fri 16:00 - 18:00 (Weeks:1-5,8-10)"] +["T1", "8870", "F15C", "Open", "20/24", "Fri 15:00 - 16:00 (Weeks:1-5,8-10), Fri 16:00 - 18:00 (Weeks:1-5,8-10)"] +["T1", "8871", "F15D", "Open", "23/24", "Fri 15:00 - 16:00 (Weeks:1-5,8-10), Fri 16:00 - 18:00 (Weeks:1-5,8-10)"] +["T1", "8872", "F17A", "Open", "20/24", "Fri 17:00 - 18:00 (Weeks:1-5,8-10), Fri 18:00 - 20:00 (Weeks:1-5,8-10)"] +["T1", "8873", "H13A", "Full", "24/24", "Thu 13:00 - 14:00 (Weeks:1-5,7-10), Thu 14:00 - 16:00 (Weeks:1-5,7-10)"] +["T1", "8874", "H15A", "Full", "24/24", "Thu 15:00 - 16:00 (Weeks:1-5,7-10), Thu 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T1", "8875", "H15B", "Open", "23/24", "Thu 15:00 - 16:00 (Weeks:1-5,7-10), Thu 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T1", "8876", "H15C", "Open", "22/24", "Thu 15:00 - 16:00 (Weeks:1-5,7-10), Thu 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T1", "8877", "H17A", "Open", "22/24", "Thu 17:00 - 18:00 (Weeks:1-5,7-10), Thu 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T1", "12547", "H18A", "Full", "24/24", "Thu 18:00 - 19:00 (Weeks:1-5,7-10), Thu 19:00 - 21:00 (Weeks:1-5,7-10)"] +["T1", "8878", "T11A", "Open", "23/24", "Tue 11:00 - 12:00 (Weeks:1-5,7-10), Tue 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T1", "8879", "T11B", "Full", "24/24", "Tue 11:00 - 12:00 (Weeks:1-5,7-10), Tue 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T1", "8880", "T11C", "Open", "22/24", "Tue 11:00 - 12:00 (Weeks:1-5,7-10), Tue 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T1", "8881", "T15A", "Open", "23/24", "Tue 15:00 - 16:00 (Weeks:1-5,7-10), Tue 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T1", "8882", "T17A", "Open", "23/24", "Tue 17:00 - 18:00 (Weeks:1-5,7-10), Tue 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T1", "12529", "T17B", "Open", "19/24", "Tue 17:00 - 18:00 (Weeks:1-5,7-10), Tue 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T1", "8883", "W11A", "Open", "23/24", "Wed 11:00 - 12:00 (Weeks:1-5,7-10), Wed 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T1", "8884", "W13A", "Full", "24/24", "Wed 13:00 - 14:00 (Weeks:1-5,7-10), Wed 14:00 - 16:00 (Weeks:1-5,7-10)"] +["T1", "8885", "W15A", "Open", "23/24", "Wed 15:00 - 16:00 (Weeks:1-5,7-10), Wed 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T1", "8886", "W15B", "Open", "23/24", "Wed 15:00 - 16:00 (Weeks:1-5,7-10), Wed 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T1", "8887", "W15C", "Open", "20/24", "Wed 15:00 - 16:00 (Weeks:1-5,7-10), Wed 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T1", "8888", "W18A", "Open", "22/24", "Wed 18:00 - 19:00 (Weeks:1-5,7-10), Wed 19:00 - 21:00 (Weeks:1-5,7-10)"] +["T1", "11937", "W18B", "Open", "19/24", "Wed 18:00 - 19:00 (Weeks:1-5,7-10), Wed 19:00 - 21:00 (Weeks:1-5,7-10)"] +["T2", "2147", "CR01", "Open", "0/10"] +["T2", "8918", "A", "Open", "348/350", "Mon 11:00 - 13:00 (Weeks:1-2,4-5,7-10)"] +["T2", "8919", "B", "Open", "542/550", "Mon 11:00 - 13:00 (Weeks:1-2,4-5,7-10)"] +["T2", "8920", "C", "Open", "890/900", "Thu 11:00 - 13:00 (Weeks:1-5,7-10)"] +["T2", "8921", "F09A", "Full", "24/24", "Fri 09:00 - 10:00 (Weeks:1-5,7-10), Fri 10:00 - 12:00 (Weeks:1-5,7-10)"] +["T2", "8922", "F09B", "Open", "23/24", "Fri 09:00 - 10:00 (Weeks:1-5,7-10), Fri 10:00 - 12:00 (Weeks:1-5,7-10)"] +["T2", "8923", "F12A", "Full", "24/24", "Fri 12:00 - 13:00 (Weeks:1-5,7-10), Fri 13:00 - 15:00 (Weeks:1-5,7-10)"] +["T2", "8924", "F13A", "Open", "23/24", "Fri 13:00 - 14:00 (Weeks:1-5,7-10), Fri 14:00 - 16:00 (Weeks:1-5,7-10)"] +["T2", "8925", "F15A", "Full", "24/24", "Fri 15:00 - 16:00 (Weeks:1-5,7-10), Fri 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "8926", "F15B", "Full", "24/24", "Fri 15:00 - 16:00 (Weeks:1-5,7-10), Fri 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "12487", "F15C", "Open", "23/24", "Fri 15:00 - 16:00 (Weeks:1-5,7-10), Fri 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "8927", "F17A", "Full", "24/24", "Fri 17:00 - 18:00 (Weeks:1-5,7-10), Fri 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T2", "8928", "H13A", "Full", "24/24", "Thu 13:00 - 14:00 (Weeks:1-5,7-10), Thu 14:00 - 16:00 (Weeks:1-5,7-10)"] +["T2", "8929", "H13B", "Full", "24/24", "Thu 13:00 - 14:00 (Weeks:1-5,7-10), Thu 14:00 - 16:00 (Weeks:1-5,7-10)"] +["T2", "8930", "H13C", "Full", "24/24", "Thu 13:00 - 14:00 (Weeks:1-5,7-10), Thu 14:00 - 16:00 (Weeks:1-5,7-10)"] +["T2", "12384", "H13D", "Full", "24/24", "Thu 13:00 - 14:00 (Weeks:1-5,7-10), Thu 14:00 - 16:00 (Weeks:1-5,7-10)"] +["T2", "8931", "H15A", "Open", "23/24", "Thu 15:00 - 16:00 (Weeks:1-5,7-10), Thu 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "8932", "H17A", "Open", "23/24", "Thu 17:00 - 18:00 (Weeks:1-5,7-10), Thu 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T2", "8933", "H17B", "Open", "23/24", "Thu 17:00 - 18:00 (Weeks:1-5,7-10), Thu 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T2", "8934", "H18A", "Open", "13/24", "Thu 18:00 - 19:00 (Weeks:1-5,7-10), Thu 19:00 - 21:00 (Weeks:1-5,7-10)"] +["T2", "8935", "H18B", "Open", "20/24", "Thu 18:00 - 19:00 (Weeks:1-5,7-10), Thu 19:00 - 21:00 (Weeks:1-5,7-10)"] +["T2", "8936", "M13A", "Open", "22/24", "Mon 13:00 - 14:00 (Weeks:1-2,4-5,7-10), Mon 14:00 - 16:00 (Weeks:1-2,4-5,7-10)"] +["T2", "12687", "M15A", "Full", "21/21", "Mon 15:00 - 16:00 (Weeks:1-2,4-5,7-10), Mon 16:00 - 18:00 (Weeks:1-2,4-5,7-10)"] +["T2", "12688", "M17A", "Open", "12/21", "Mon 17:00 - 18:00 (Weeks:1-2,4-5,7-10), Mon 18:00 - 20:00 (Weeks:1-2,4-5,7-10)"] +["T2", "12486", "T09A", "Full", "24/24", "Tue 09:00 - 10:00 (Weeks:1-5,7-10), Tue 10:00 - 12:00 (Weeks:1-5,7-10)"] +["T2", "12689", "T11A", "Open", "20/21", "Tue 11:00 - 12:00 (Weeks:1-5,7-10), Tue 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T2", "12690", "T15A", "Full", "21/21", "Tue 15:00 - 16:00 (Weeks:1-5,7-10), Tue 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "8937", "T18A", "Full", "24/24", "Tue 18:00 - 19:00 (Weeks:1-5,7-10), Tue 19:00 - 21:00 (Weeks:1-5,7-10)"] +["T2", "8939", "W09A", "Full", "24/24", "Wed 09:00 - 10:00 (Weeks:1-5,7-10), Wed 10:00 - 12:00 (Weeks:1-5,7-10)"] +["T2", "8940", "W09B", "Full", "24/24", "Wed 09:00 - 10:00 (Weeks:1-5,7-10), Wed 10:00 - 12:00 (Weeks:1-5,7-10)"] +["T2", "8941", "W09C", "Full", "24/24", "Wed 09:00 - 10:00 (Weeks:1-5,7-10), Wed 10:00 - 12:00 (Weeks:1-5,7-10)"] +["T2", "8942", "W11A", "Full", "24/24", "Wed 11:00 - 12:00 (Weeks:1-5,7-10), Wed 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T2", "8943", "W11B", "Full", "24/24", "Wed 11:00 - 12:00 (Weeks:1-5,7-10), Wed 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T2", "8944", "W11C", "Full", "24/24", "Wed 11:00 - 12:00 (Weeks:1-5,7-10), Wed 12:00 - 14:00 (Weeks:1-5,7-10)"] +["T2", "8945", "W14A", "Full", "24/24", "Wed 14:00 - 15:00 (Weeks:1-5,7-10), Wed 15:00 - 17:00 (Weeks:1-5,7-10)"] +["T2", "8946", "W15A", "Full", "24/24", "Wed 15:00 - 16:00 (Weeks:1-5,7-10), Wed 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "8947", "W15B", "Full", "24/24", "Wed 15:00 - 16:00 (Weeks:1-5,7-10), Wed 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "8948", "W15C", "Full", "24/24", "Wed 15:00 - 16:00 (Weeks:1-5,7-10), Wed 16:00 - 18:00 (Weeks:1-5,7-10)"] +["T2", "8949", "W17A", "Full", "24/24", "Wed 17:00 - 18:00 (Weeks:1-5,7-10), Wed 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T2", "8950", "W17B", "Open", "23/24", "Wed 17:00 - 18:00 (Weeks:1-5,7-10), Wed 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T2", "8951", "W17C", "Full", "24/24", "Wed 17:00 - 18:00 (Weeks:1-5,7-10), Wed 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T2", "8952", "W17D", "Full", "24/24", "Wed 17:00 - 18:00 (Weeks:1-5,7-10), Wed 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T2", "8953", "W17E", "Full", "24/24", "Wed 17:00 - 18:00 (Weeks:1-5,7-10), Wed 18:00 - 20:00 (Weeks:1-5,7-10)"] +["T3", "1405", "CR01", "Open", "680/1000"] diff --git a/src/class_scraper.rs b/src/class_scraper.rs index 5bd5cca..7b83e29 100644 --- a/src/class_scraper.rs +++ b/src/class_scraper.rs @@ -1,43 +1,173 @@ -// pub async fn run_scraper(&mut self) -> Result<(), Box> { -// match &self.url { -// Some(url) => { -// let html = self.fetch_url(url).await?; -// println!("{}", html); -// let row_selector = Selector::parse("tr.rowLowlight, tr.rowHighlight").unwrap(); -// let code_selector = Selector::parse("td.data").unwrap(); -// let name_selector = Selector::parse("td.data a").unwrap(); -// let link_selector = Selector::parse("td.data a").unwrap(); -// let school_selector = Selector::parse("td.data:nth-child(3)").unwrap(); -// let document = scraper::Html::parse_document(&html); -// for row_node in document.select(&row_selector) { -// // Extract data from each row -// let subject_area_course_code = -// extract_text(row_node.select(&code_selector).next().unwrap()); -// let subject_area_course_name = -// extract_text(row_node.select(&name_selector).next().unwrap()); -// let url = get_html_link_to_page( -// row_node -// .select(&link_selector) -// .next() -// .map_or("", |node| node.value().attr("href").unwrap_or("")), -// ); -// let school = extract_text(row_node.select(&school_selector).next().unwrap()); -// // Create a Course struct and push it to the vector -// let page = Page { -// subject_area_course_code, -// subject_area_course_name, -// url, -// school, -// courses: Vec::new(), -// }; - -// self.add_page(page); - -// } - -// println!("{:?}", self.pages); -// Ok(()) -// } -// None => Err(Box::new(UrlInvalidError)), -// } +use log::info; +use scraper::{Html, Selector}; +use select::document; + +use crate::{ + scraper::{fetch_url}, + text_manipulators::{extract_text, get_html_link_to_page}, + Scraper, UrlInvalidError, +}; + +#[derive(Debug)] +pub enum Career { + UG, + PG, + RESEARCH, +} + +#[derive(Debug)] +pub enum Term { + T1, + T2, + T3, + Summer, +} + +#[derive(Debug)] +pub struct Course { + subject_area_course_code: String, + subject_area_course_name: String, + uoc: i32, + faculty: Option, + school: Option, + campus: Option, + career: Option, + terms: Vec, + census_dates: Vec, + classes: Vec, + notes: Option, +} + + +#[derive(Debug)] +enum Status { + Open, + Closed, +} + +#[derive(Debug)] +struct Enrolment { + enrolled: u32, + capacity: u32, +} + +#[derive(Debug)] +pub struct Class { + class_id: u32, + section: String, + term: Term, + activity: String, + status: Status, + course_enrolment: Enrolment, + term_date: String, + mode: String, + times: String, +} + +#[derive(Debug)] +pub struct ClassScraper { + pub subject_area_course_code: String, + pub subject_area_course_name: String, + pub uoc: i32, + pub url: String, +} + + +impl Scraper for ClassScraper { + async fn scrape(&mut self) -> Result<(), Box> { + let html = fetch_url(&self.url).await?; + + let document = scraper::Html::parse_document(&html); + + // let terms_selector = Selector::parse("table td.formBody td.data tbody").unwrap(); // valid terms + + // Information Body: + let form_bodies = Selector::parse("td.formBody td.formBody").unwrap(); + let information_body = document.select(&form_bodies).skip(0).next().unwrap(); + let table_selector = Selector::parse("td.formBody > table:nth-of-type(1) > tbody > tr").unwrap(); + let label_selector = Selector::parse("td.label").unwrap(); + let data_selector = Selector::parse("td.data").unwrap(); + let mut course_info = Course { + subject_area_course_code: self.subject_area_course_code.clone(), + subject_area_course_name: self.subject_area_course_name.clone(), + uoc: self.uoc, + faculty: None, + school: None, + campus: None, + career: None, + terms: vec![], + census_dates: vec![], + classes: vec![], + notes: None + }; + + for row in information_body.select(&table_selector) { + let labels: Vec<_> = row.select(&label_selector).map(|el| el.text().collect::>().join("")).collect(); + let data: Vec<_> = row.select(&data_selector).map(|el| el.text().collect::>().join("")).collect(); + + // Print or process the extracted labels and data + for (label, data) in labels.iter().zip(data.iter()) { + println!("Label: {}, Data: {}", label, data); + + match label.trim().to_lowercase().as_str() { + "faculty" => course_info.faculty = Some(data.clone()), + "school" => course_info.school = Some(data.clone()), + "campus" => course_info.campus = Some(data.clone()), + "career" => course_info.career = Some(data.clone()), + _ => {} + } + } + + } + + let term_course_information_table = Selector::parse("td.formBody td.formBody table:nth-of-type(3) tbody tr").unwrap(); + let valid_row_data_len = 1; + for row in document.select(&term_course_information_table) { + let cell_selector = Selector::parse("td.data").unwrap(); + let cells: Vec<_> = row + .select(&cell_selector) + .map(|cell| cell.text().collect::>().join("").trim().replace("\u{a0}", "")) + .filter(|text| !text.is_empty()) + .collect(); + if cells.len() <= valid_row_data_len { + continue; + } + + let duplicate_term_removed = cells[1..].to_vec(); + + println!("{:?}", duplicate_term_removed); + // Skip rows that do not have the expected number of cells + // if cells.len() == 6 { + // let teaching_period = &cells[0]; + // let staff_contact = &cells[1]; + // let census_date = &cells[2]; + // let notes = &cells[3]; + + // // Print the extracted data + // println!("Teaching Period: {}", teaching_period); + // println!("Staff Contact: {}", staff_contact); + // println!("Census Date: {}", census_date); + // println!("Notes: {}", notes); + // println!("-------------------------"); + // } + } + // let val = document.select(&term_course_information_table).next().map(|el| el.html()).unwrap_or_default(); + // println!("{:?}", val); + + Ok(()) + } +} + + + + +// impl ClassScraper { +// pub fn new(url: String) -> Self { + // ClassScraper { + // subject_area_course_code: todo!(), + // subject_area_course_name: todo!(), + // uoc: todo!(), + // url, + // } // } +// } diff --git a/src/lib.rs b/src/lib.rs index 149222a..d9c1a25 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,10 +1,15 @@ mod scraper; mod url_invalid_error; +mod school_area_scraper; mod class_scraper; mod subject_area_scraper; mod text_manipulators; pub use scraper::Scraper; +pub use scraper::fetch_url; pub use url_invalid_error::UrlInvalidError; +// pub use subject_area_scraper::SubjectAreaScraper; +pub use school_area_scraper::SchoolAreaScraper; pub use subject_area_scraper::SubjectAreaScraper; +pub use class_scraper::ClassScraper; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index ace541a..d643981 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,8 @@ -use spooderman::{Scraper, SubjectAreaScraper}; +use spooderman::{ClassScraper, SchoolAreaScraper, Scraper, SubjectAreaScraper}; use dotenv::dotenv; use regex::Regex; use chrono::{Datelike, Utc}; +use std::collections::HashMap; extern crate log; extern crate env_logger; @@ -10,9 +11,13 @@ use log::LevelFilter; use log::{info, warn, error}; +type CourseCode = String; +type FacultyCode = String; -fn mutate_string_to_include_curr_year(curr_base_url: &mut String) -> String { +pub fn mutate_string_to_include_curr_year(curr_base_url: &mut String) -> String { let pattern = Regex::new("year").unwrap(); + // let course_map: HashMap = HashMap::new(); + pattern.replace(&curr_base_url, Utc::now().year().to_string()).to_string() } @@ -22,24 +27,38 @@ async fn main() { env_logger::Builder::new() .filter_level(LevelFilter::Info) .init(); + + // The base API url needs the year to be replaced with "year". // ie. https://timetable.unsw.edu.au/year/subjectSearch.html match std::env::var("TIMETABLE_API_URL") { Ok(url) => { info!("Timetable URL has been parsed from environment file: {url}!"); let url_to_scrape = mutate_string_to_include_curr_year(&mut url.to_string()); - let mut scraper = SubjectAreaScraper::new().set_url(url_to_scrape); - match scraper.run_scraper_on_url().await { - Ok(_) => info!("Scraping successful!\n"), - Err(e) => error!("Error: {}", e), - } + + let mut scraper = ClassScraper { + subject_area_course_code: "COMP1511".to_string(), + subject_area_course_name: "COMP1511".to_string(), + uoc: 6, + url: "https://timetable.unsw.edu.au/2024/COMP2521.html".to_string(), + // url: "https://timetable.unsw.edu.au/2024/COMP1511.html".to_string(), + }; + let _ = scraper.scrape().await; + // let mut scraper = SchoolAreaScraper::new(url_to_scrape); + + // // let mut scraper = SubjectAreaScraper::new("https://timetable.unsw.edu.au/2024/COMPKENS.html".to_string()); + // match scraper.scrape().await { + // Ok(_) => info!("Scraping successful!\n"), + // Err(e) => error!("Error: {}", e), + // } + // for school_area_page in &mut scraper.pages { + // let _ = school_area_page.subject_area_scraper.scrape().await; + // } + // println!("{:?}", scraper); } Err(e) => { warn!("Timetable URL has NOT been parsed properly from env file and error report: {e}"); } }; - // let mut scraper = SubjectAreaScraper::new() - // .set_url(base_api_url.to_string()); - // } diff --git a/src/school_area_scraper.rs b/src/school_area_scraper.rs new file mode 100644 index 0000000..db6534b --- /dev/null +++ b/src/school_area_scraper.rs @@ -0,0 +1,74 @@ +use log::info; +use scraper::Selector; + +use crate::{ + class_scraper::Course, scraper::fetch_url, subject_area_scraper::{self, SubjectAreaScraper}, text_manipulators::{extract_text, get_html_link_to_page}, Scraper, UrlInvalidError +}; + + +#[derive(Debug)] +pub struct SchoolAreaPage { + pub subject_area_course_code: String, + pub subject_area_course_name: String, + pub school: String, + pub subject_area_scraper: SubjectAreaScraper, +} + +#[derive(Debug)] +pub struct SchoolAreaScraper { + pub url: Option, + pub pages: Vec, +} + +impl Scraper for SchoolAreaScraper { + async fn scrape(&mut self) -> Result<(), Box> { + match &self.url { + Some(url) => { + let html = fetch_url(url).await?; + let row_selector = Selector::parse("tr.rowLowlight, tr.rowHighlight").unwrap(); + let code_selector = Selector::parse("td.data").unwrap(); + let name_selector = Selector::parse("td.data a").unwrap(); + let link_selector = Selector::parse("td.data a").unwrap(); + let school_selector = Selector::parse("td.data:nth-child(3)").unwrap(); + let document = scraper::Html::parse_document(&html); + for row_node in document.select(&row_selector) { + // Extract data from each row + let subject_area_course_code = + extract_text(row_node.select(&code_selector).next().unwrap()); + let subject_area_course_name = + extract_text(row_node.select(&name_selector).next().unwrap()); + let url = get_html_link_to_page( + row_node + .select(&link_selector) + .next() + .map_or("", |node| node.value().attr("href").unwrap_or("")), + ); + let school = extract_text(row_node.select(&school_selector).next().unwrap()); + let page = SchoolAreaPage { + subject_area_course_code, + subject_area_course_name, + school, + subject_area_scraper: SubjectAreaScraper::new(url), + }; + + self.pages.push(page); + } + + Ok(()) + } + None => Err(Box::new(UrlInvalidError)), + } + } +} + + + + +impl SchoolAreaScraper { + pub fn new(url: String) -> Self { + SchoolAreaScraper { + url: Some(url), + pages: Vec::new(), + } + } +} diff --git a/src/scraper.rs b/src/scraper.rs index 74edc06..e7ceafc 100644 --- a/src/scraper.rs +++ b/src/scraper.rs @@ -1,120 +1,7 @@ -use chrono::{DateTime, Utc}; use reqwest::ClientBuilder; -use scraper::{html, ElementRef, Selector}; -use std::ops::Add; -use crate::{UrlInvalidError, subject_area_scraper::SubjectAreaPage}; - -#[derive(Debug)] -enum Term { - T1, - T2, - T3, - Summer, -} - -#[derive(Debug)] -enum Status { - Open, - Closed, -} - -#[derive(Debug)] -struct Enrolment { - enrolled: u32, - capacity: u32, -} - -#[derive(Debug)] -struct TimeBlock { - start: (u32, u32), - end: (u32, u32), -} - -impl Add for TimeBlock { - type Output = TimeBlock; - - fn add(self, another: TimeBlock) -> Self { - let add_hours = |a, b| (a + b) % 24; - let add_minutes = |a, b| (a + b) % 60; - Self { - start: ( - add_hours(self.start.0, another.start.0), - add_minutes(self.start.1, another.start.1), - ), - end: ( - add_hours(self.end.0, another.end.0), - add_minutes(self.end.1, another.end.1), - ), - } - } -} - -#[derive(Debug)] -struct DateBlock { - start: DateTime, - end: DateTime, -} - -#[derive(Debug)] -enum Day { - Sunday, - Monday, - Tuesday, - Wednesday, - Thursday, - Friday, - Saturday, -} - -#[derive(Debug)] -pub struct ClassTimeBlock { - day: Day, - weeks: String, - time: TimeBlock, - location: String, -} - -#[derive(Debug)] -pub struct Class { - class_id: u32, - section: String, - term: Term, - activity: String, - status: Status, - course_enrolment: Enrolment, - term_date: DateBlock, - mode: String, - times: Vec, -} - -#[derive(Debug)] -enum Career { - UG, - PG, - RESEARCH, -} - -#[derive(Debug)] -pub struct Course { - code: String, - name: String, - campus: Career, - career: String, - terms: Vec, - census_dates: Vec, - classes: Vec, - notes: String, -} - - -pub trait Page { - fn view_page_details(&self); -} pub trait Scraper { - fn new() -> Self; - fn set_url(&mut self, url: String) -> Self; - fn add_page(&mut self, page: Box::); + fn scrape(&mut self) -> impl std::future::Future>> + Send; } pub async fn fetch_url(url: &str) -> Result> { @@ -125,29 +12,4 @@ pub async fn fetch_url(url: &str) -> Result> let body = response.text().await?; Ok(body) } -// impl Scraper { - - - - -// pub fn add_page(&mut self, page: impl Page) { -// self.pages.push(Box::new(page)); -// } - -// // pub async fn run_scraper(&mut self) -> Result<(), Box> { -// // self.subject_area_scrape().await -// // } -// } - -// impl Scraper { -// pub fn view_scraper(&self) { -// println!("{:?}", self); -// } -// } - -// impl Default for Scraper { -// fn default() -> Self { -// Self::new() -// } -// } diff --git a/src/server.rs b/src/server.rs deleted file mode 100644 index e69de29..0000000 diff --git a/src/subject_area_scraper.rs b/src/subject_area_scraper.rs index 0565dde..a1f9db6 100644 --- a/src/subject_area_scraper.rs +++ b/src/subject_area_scraper.rs @@ -1,53 +1,26 @@ +use log::info; use scraper::Selector; use crate::{ - scraper::{Course, Page, fetch_url}, - text_manipulators::{extract_text, get_html_link_to_page}, - Scraper, UrlInvalidError, + class_scraper::{ClassScraper, Course}, scraper::fetch_url, text_manipulators::{extract_text, get_html_link_to_page}, Scraper, UrlInvalidError }; #[derive(Debug)] -pub struct SubjectAreaPage { - url: String, - subject_area_course_code: String, - subject_area_course_name: String, - school: String, - courses: Vec, -} - - -impl Page for SubjectAreaPage { - fn view_page_details(&self) { - println!("{:?}", self) - } -} - - - -#[derive(Debug)] - pub struct SubjectAreaScraper { pub url: Option, - pub pages: Vec>, -} + pub class_scrapers: Vec -impl std::fmt::Debug for dyn Page { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.view_page_details()) - } } - -impl SubjectAreaScraper { - pub async fn run_scraper_on_url(&mut self) -> Result<(), Box> { +impl Scraper for SubjectAreaScraper { + async fn scrape(&mut self) -> Result<(), Box> { match &self.url { Some(url) => { let html = fetch_url(url).await?; - println!("{}", html); let row_selector = Selector::parse("tr.rowLowlight, tr.rowHighlight").unwrap(); let code_selector = Selector::parse("td.data").unwrap(); let name_selector = Selector::parse("td.data a").unwrap(); let link_selector = Selector::parse("td.data a").unwrap(); - let school_selector = Selector::parse("td.data:nth-child(3)").unwrap(); + let uoc_selector = Selector::parse("td.data:nth-child(3)").unwrap(); let document = scraper::Html::parse_document(&html); for row_node in document.select(&row_selector) { // Extract data from each row @@ -61,42 +34,22 @@ impl SubjectAreaScraper { .next() .map_or("", |node| node.value().attr("href").unwrap_or("")), ); - let school = extract_text(row_node.select(&school_selector).next().unwrap()); - // Create a Course struct and push it to the vector - let page = SubjectAreaPage { - subject_area_course_code, - subject_area_course_name, - url, - school, - courses: Vec::new(), - }; - - self.add_page(Box::new(page)); + let uoc = extract_text(row_node.select(&uoc_selector).next().unwrap()).parse().expect("Could not parse UOC!"); + self.class_scrapers.push(ClassScraper { subject_area_course_code, subject_area_course_name, uoc, url }); } - println!("{:?}", self.pages); Ok(()) } None => Err(Box::new(UrlInvalidError)), } } } -impl Scraper for SubjectAreaScraper { - fn new() -> Self { - SubjectAreaScraper { - url: None, - pages: Vec::new(), - } - } - fn set_url(&mut self, url: String) -> Self { - SubjectAreaScraper { +impl SubjectAreaScraper { + pub fn new(url: String) -> Self { + Self { url: Some(url), - pages: Vec::new(), + class_scrapers: vec![], } } - - fn add_page(&mut self, page: Box::) { - self.pages.push(page); - } }