1+ #include <stdlib.h>
2+ #include <stdio.h>
3+ #include <string.h>
4+
5+ #include <myhtml/myhtml.h>
6+ #include <myhtml/serialization.h>
7+ #include <mycss/selectors/serialization.h>
8+ #include <modest/finder/finder.h>
9+
10+ char * readeof (){
11+ const static int buffer_size = 1024 ;
12+ char buffer [buffer_size ];
13+ size_t content_size = 1 ; // \0
14+ char * content = malloc (sizeof (char )* buffer_size );
15+ if (content == NULL ){
16+ perror ("Failed to allocate" );
17+ exit (EXIT_FAILURE );
18+ }
19+ content [0 ] = '\0' ;
20+ while (fgets (buffer , buffer_size , stdin )){
21+ char * content_old = content ;
22+ content_size += strlen (buffer );
23+ content = realloc (content , content_size );
24+ if (content == NULL ){
25+ perror ("Failed to allocate" );
26+ free (content_old );
27+ exit (EXIT_FAILURE );
28+ }
29+ strcat (content , buffer );
30+ }
31+ return content ;
32+ }
33+
34+ unsigned int serializer_log (const char * data , size_t len , void * ctx ){
35+ printf ("%.*s" , (int )len , data );
36+ return 0 ;
37+ }
38+
39+ void opthandler (const char * arg , const char * progname ){
40+ if (!strcmp (arg , "help" ) || !strcmp (arg , "h" )){
41+ fprintf (stderr , "hq (html query) - commandline HTML processor © Robin Broda, 2018\n" );
42+ fprintf (stderr , "Usage: %s [options] <selector> <mode> [mode argument]\n\n" , progname );
43+ fprintf (stderr , "Options:\n" );
44+ fprintf (stderr , "-h, --help\tshow this text\n" );
45+ fprintf (stderr , "\n" );
46+ fprintf (stderr , "<selector>\tselector to match\n" );
47+ fprintf (stderr , "<mode>\t\tprocessing mode\n" );
48+ fprintf (stderr , "\t\tmay be one of { data, text, attr }:\n" );
49+ fprintf (stderr , "\t\tdata - return raw html of matching elements\n" );
50+ fprintf (stderr , "\t\ttext - return inner text of matching elements\n" );
51+ fprintf (stderr , "\t\tattr - return attribute value X of matching elements\n" );
52+ fprintf (stderr , "\t\t\t[mode argument] - attribute to return\n" );
53+ exit (EXIT_SUCCESS );
54+ }
55+ }
56+
57+ int main (int argc , const char * argv []){
58+ if (argc == 1 ) opthandler ("help" , argv [0 ]);
59+
60+ size_t shifts = 0 ; // offset of new argv
61+ while (argc > 1 ){
62+ if (argv [1 ][0 ] == '-' ){
63+ const char * arg = argv [1 ];
64+ if (arg [1 ] == '-' ){
65+ const char * longarg = arg + 2 ;
66+ opthandler (longarg , 0 [argv - shifts ]);
67+ }else {
68+ for (size_t i = 1 ; i < strlen (arg ); i ++ ){
69+ const char shortarg [2 ] = { arg [i ], '\0' };
70+ opthandler (shortarg , 0 [argv - shifts ]);
71+ }
72+ }
73+ shifts ++ ;
74+ argv ++ ;
75+ argc -- ;
76+ }else {
77+ argv [0 ] = 0 [argv - shifts ]; // restore argv[0]
78+ break ;
79+ }
80+ }
81+
82+ const char * selector ;
83+ if (argc > 1 ){
84+ selector = argv [1 ];
85+ }else {
86+ fprintf (stderr , "No selector given\n" );
87+ exit (EXIT_FAILURE );
88+ }
89+
90+ const char * mode ;
91+ if (argc > 2 ){
92+ mode = argv [2 ];
93+ }else {
94+ fprintf (stderr , "No mode given\n" );
95+ exit (EXIT_FAILURE );
96+ }
97+
98+ char * input = readeof ();
99+
100+ myhtml_t * myhtml = myhtml_create ();
101+ mystatus_t mystatus = myhtml_init (myhtml , MyHTML_OPTIONS_DEFAULT , 1 , 0 );
102+ if (mystatus ){
103+ fprintf (stderr , "Failed to init MyHTML\n" );
104+ exit (EXIT_FAILURE );
105+ }
106+
107+ myhtml_tree_t * html_tree = myhtml_tree_create ();
108+ mystatus = myhtml_tree_init (html_tree , myhtml );
109+ if (mystatus ){
110+ fprintf (stderr , "Failed to init MyHTML tree\n" );
111+ exit (EXIT_FAILURE );
112+ }
113+
114+ mystatus = myhtml_parse (html_tree , MyENCODING_UTF_8 , input , strlen (input ));
115+ if (mystatus ){
116+ fprintf (stderr , "Failed to parse HTML\n" );
117+ exit (EXIT_FAILURE );
118+ }
119+
120+ mycss_t * mycss = mycss_create ();
121+ mystatus = mycss_init (mycss );
122+ if (mystatus ){
123+ fprintf (stderr , "Failed to init MyCSS\n" );
124+ exit (EXIT_FAILURE );
125+ }
126+
127+ mycss_entry_t * css_entry = mycss_entry_create ();
128+ mystatus = mycss_entry_init (mycss , css_entry );
129+ if (mystatus ){
130+ fprintf (stderr , "Failed to init MyCSS entry\n" );
131+ exit (EXIT_FAILURE );
132+ }
133+
134+ modest_finder_t * finder = modest_finder_create_simple ();
135+
136+ mycss_selectors_list_t * selectors_list = mycss_selectors_parse (
137+ mycss_entry_selectors (css_entry ),
138+ MyENCODING_UTF_8 ,
139+ selector , strlen (selector ), & mystatus
140+ );
141+
142+ if (selectors_list == NULL || (selectors_list -> flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD )){
143+ fprintf (stderr , "Bad selector\n" );
144+ exit (EXIT_FAILURE );
145+ }
146+
147+ myhtml_collection_t * collection = NULL ;
148+ modest_finder_by_selectors_list (finder , html_tree -> node_html , selectors_list , & collection );
149+
150+ if (collection ){
151+ for (size_t i = 0 ; i < collection -> length ; i ++ ){
152+ if (!strcmp (mode , "text" )){
153+ myhtml_serialization_tree_callback (collection -> list [i ]-> child , serializer_log , NULL );
154+ printf ("\n" );
155+ }else if (!strcmp (mode , "data" )){
156+ myhtml_serialization_tree_callback (collection -> list [i ], serializer_log , NULL );
157+ printf ("\n" );
158+ }else if (!strcmp (mode , "attr" )){
159+ const char * attr_name ;
160+ if (argc > 3 ){
161+ attr_name = argv [3 ];
162+ }else {
163+ fprintf (stderr , "No attr name given" );
164+ exit (EXIT_FAILURE );
165+ }
166+ myhtml_tree_node_t * node = collection -> list [i ];
167+ myhtml_token_node_t * token = node -> token ;
168+ if (token == NULL ) continue ;
169+ myhtml_token_attr_t * attr = token -> attr_first ;
170+ if (attr == NULL ) continue ;
171+
172+ do {
173+ if (!strcmp (attr_name , mycore_string_data (& attr -> key ))){
174+ printf ("%s\n" , mycore_string_data (& attr -> value ));
175+ }
176+ if (attr != token -> attr_last ) attr = attr -> next ;
177+ }while (attr != token -> attr_last );
178+ }else {
179+ fprintf (stderr , "invalid mode: '%s'\n" , mode );
180+ exit (EXIT_FAILURE );
181+ }
182+ }
183+ }
184+
185+ // cleanup
186+ myhtml_collection_destroy (collection );
187+ mycss_selectors_list_destroy (mycss_entry_selectors (css_entry ), selectors_list , true);
188+ modest_finder_destroy (finder , true);
189+ mycss_destroy (css_entry -> mycss , true);
190+ mycss_entry_destroy (css_entry , true);
191+ myhtml_destroy (html_tree -> myhtml );
192+ myhtml_tree_destroy (html_tree );
193+ free (input );
194+ return 0 ;
195+ }
0 commit comments