Edinburgh Speech Tools  2.1-release
io.cc
Go to the documentation of this file.
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /************************************************************************/
33  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
34  /* Date: Tue Jun 10 1997 */
35  /************************************************************************/
36  /* */
37  /* Functions to open file descriptors for various kinds of data */
38  /* sources and sinks. */
39  /* */
40  /************************************************************************/
41 
42 #include <cstdlib>
43 #include <cstdio>
44 #include <fcntl.h>
45 #include "EST_unix.h"
46 #include "EST_socket.h"
47 
48 #include <sys/types.h>
49 
50 #include "EST_String.h"
51 #include "EST_bool.h"
52 #include "siod.h"
53 #include "siodp.h"
54 #include "io.h"
55 
56 using std::cout;
57 
58 EST_Regex RxURL("\\([a-z]+\\)://?\\([^/:]+\\)\\(:\\([0-9]+\\)\\)?\\(.*\\)");
59 EST_Regex RxFILEURL("file:.*");
60 static EST_Regex ipnum("[0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+");
61 
62 const int default_http_port = 80;
63 const int default_ftp_port = 21;
64 
65 #define MAX_LINE_LENGTH (256)
66 
67 static int port_to_int(const char *port)
68 {
69  struct servent *serv;
70 
71  if (!port || *port == '\0')
72  return -1;
73 
74  if ((serv=getservbyname(port, "tcp")))
75  return serv->s_port;
76 
77  return atoi(port);
78 }
79 
80 int parse_url(const EST_String &url,
81  EST_String &protocol,
82  EST_String &host,
83  EST_String &port,
84  EST_String &path)
85 {
86  EST_String bitpath;
87  size_t start_of_bracket[EST_Regex_max_subexpressions];
88  size_t end_of_bracket[EST_Regex_max_subexpressions];
89 
90  if (url.matches(RxFILEURL,0,start_of_bracket, end_of_bracket))
91  {
92  protocol = "file";
93  host = "";
94  port = "";
95  path = url.after("file:");
96  return TRUE;
97  }
98  else if (!url.matches(RxURL, 0, start_of_bracket, end_of_bracket))
99  return FALSE;
100 
101  protocol = url.at(start_of_bracket[1], end_of_bracket[1]-start_of_bracket[1]);
102  host = url.at(start_of_bracket[2], end_of_bracket[2]-start_of_bracket[2]);
103  port = url.at(start_of_bracket[4], end_of_bracket[4]-start_of_bracket[4]);
104  bitpath = url.at(start_of_bracket[5], end_of_bracket[5]-start_of_bracket[5]);
105 
106  if (protocol == "http")
107  path = protocol + "://" + host + bitpath;
108  else
109  path = bitpath;
110 
111  return TRUE;
112 }
113 
114 static int connect_to_server(const char *host, int port)
115 {
116  struct sockaddr_in address;
117  struct hostent *hostentp;
118  EST_String shost=host;
119  int s;
120 
121  memset(&address, 0, sizeof(address));
122 
123  if (shost.matches(ipnum))
124  {
125  address.sin_addr.s_addr = inet_addr(host);
126  address.sin_family = AF_INET;
127  }
128  else if ((hostentp=gethostbyname(host))==NULL)
129  err("can't find host", host);
130  else
131  {
132  memset(&(address.sin_addr),0,sizeof(struct in_addr));
133  address.sin_family=hostentp->h_addrtype;
134  memmove(&address.sin_addr,
135  (hostentp->h_addr_list)[0],
136  hostentp->h_length);
137  }
138  address.sin_port=htons(port);
139 
140  if ((s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
141  err("can't create socket", NIL);
142 
143  if (connect(s, (struct sockaddr *)&address, sizeof(address)) < 0)
144  {
145  close(s);
146  err("can't connect to host",
147  inet_ntoa(address.sin_addr));
148  }
149 
150  return s;
151 }
152 
153 static void server_send(int s, const char *text)
154 {
155  ssize_t n=strlen(text);
156  ssize_t sent;
157 
158  while (n>0)
159  if ((sent = write(s, text, n))<0)
160  err("error talking to server", NIL);
161  else
162  n -= sent;
163 }
164 
165 static const char *server_get_line(int s)
166 {
167  static char buffer[MAX_LINE_LENGTH+1];
168  char *p=buffer;
169  ssize_t n;
170 
171  *p='\0';
172 
173  while(1==1)
174  if ((n=read(s, p, 1)) == 0)
175  break;
176  else if (n < 0)
177  err("error while reading from server", NIL);
178  else if (*(p++) == '\n')
179  break;
180 
181  *p = '\0';
182 
183  return buffer;
184 }
185 
186 
187 /*
188  * Open stdin or stdout. Should this do a dup?
189  */
190 
191 int fd_open_stdinout(const char *r_or_w)
192 {
193  int fd = -1;
194 
195  if (r_or_w[0] == 'r')
196  fd = fileno(stdin);
197  else if (r_or_w[0] == 'w')
198  fd = fileno(stdout);
199  else
200  err("mode not understood for -", r_or_w);
201  return fd;
202 }
203 
204 /*
205  * Duplicates the fopen interpretation of the type
206  * parameter plus "rw" being a synonym for "r+" to preserve
207  * some scheme semantics.
208  */
209 int fd_open_file(const char *name, const char *r_or_w)
210 {
211  int fd;
212  int mode=0;
213  int go_to_end=0;
214 
215  if (strcmp(name, "-")==0)
216  return fd_open_stdinout(r_or_w);
217 
218  if (r_or_w[0] == 'r')
219  if (r_or_w[1] == '+' || r_or_w[1] == 'w')
220  mode = O_RDWR|O_CREAT;
221  else
222  mode = O_RDONLY;
223  else if (r_or_w[0] == 'w')
224  if (r_or_w[1] == '+')
225  mode = O_RDWR|O_CREAT|O_TRUNC;
226  else
227  mode = O_WRONLY|O_CREAT|O_TRUNC;
228  else if (r_or_w[0] == 'a')
229  if (r_or_w[1] == '+')
230  go_to_end = mode = O_RDWR;
231  else
232  go_to_end = mode = O_WRONLY|O_CREAT;
233  else
234  err("mode not understood", r_or_w);
235 
236  /* Should deal with `b' here for binary files.
237  */
238 
239  fd= open(name, mode, 0666);
240 
241  if (fd >=0 && go_to_end)
242  lseek(fd, 0, SEEK_END);
243 
244  return fd;
245 }
246 
247 int fd_open_http(const char *host,
248  int port,
249  const char *path,
250  const char *r_or_w)
251 {
252  int s;
253 
254  if (port <0)
255  port=default_http_port;
256 
257  if ((s=connect_to_server(host, port)) < 0)
258  return s;
259 
260  if (*r_or_w == 'r')
261  {
262  const char *line;
263  float http_version;
264  int code;
265  char location[1024] = "";
266 
267  server_send(s, "GET ");
268  server_send(s, path);
269  server_send(s, " HTTP/1.0\n\n");
270  shutdown(s, 1);
271 
272  line= server_get_line(s);
273 
274  if (sscanf(line, "HTTP/%f %d", &http_version, &code) != 2)
275  {
276  close(s);
277  err("HTTP error", line);
278  }
279 
280  // Skip rest of header.
281  while((line = server_get_line(s)))
282  {
283  if (*line=='\r' || *line == '\n' || *line == '\0')
284  break;
285  else if (sscanf(line, "Location: %s", location) == 1)
286  {
287  cout << "redirect to '" << location << "'\n";
288  }
289  }
290 
291  if (code == 301 || code == 302)
292  {
293  close(s);
294 
295  if (*location == '\0')
296  err("Redirection to no loction", NIL);
297 
298 
299  EST_String sprotocol, shost, sport, spath;
300 
301  if (!parse_url(location, sprotocol, shost, sport, spath))
302  err("redirection to bad URL", location);
303 
304  s = fd_open_url(sprotocol, shost, sport, spath, "rb");
305  }
306 
307  }
308  else if (*r_or_w == 'w')
309  err("Write to HTTP url not yet implemented", NIL);
310 
311  return s;
312 }
313 
314 int fd_open_ftp(const char *host,
315  int port,
316  const char *path,
317  const char *r_or_w)
318 {
319  (void)host;
320  (void)port;
321  (void)path;
322  (void)r_or_w;
323 
324  return -1;
325 }
326 
327 int fd_open_tcp(const char *host,
328  int port,
329  const char *text,
330  const char *r_or_w)
331 {
332  int s;
333 
334  if (port <0)
335  return -1;
336 
337  if ((s=connect_to_server(host, port)) < 0)
338  return s;
339 
340  server_send(s, text);
341 
342  if (*r_or_w == 'r')
343  shutdown(s, 1);
344  else if (*r_or_w == 'w')
345  shutdown(s, 0);
346 
347  return s;
348 }
349 
350 /*
351  * Open a stream to a URL.
352  */
353 
354 int fd_open_url(const char *protocol,
355  const char *host,
356  const char *port,
357  const char *path,
358  const char *r_or_w)
359 {
360  // special case for local file URLs
361  if (strcmp(protocol, "file") == 0
362  && (!host || *host == '\0')
363  && (!port || *port == '\0'))
364  return fd_open_file(path, r_or_w);
365  else if (strcmp(protocol, "file") == 0 || strcmp(protocol, "ftp") == 0)
366  return fd_open_ftp(host, port_to_int(port), path, r_or_w);
367  else if (strcmp(protocol, "http") == 0)
368  return fd_open_http(host, port_to_int(port), path, r_or_w);
369  else if (strcmp(protocol, "tcp") == 0)
370  return fd_open_tcp(host, port_to_int(port), path, r_or_w);
371  else
372  return -1;
373 }
EST_Regex RxFILEURL("file:.*")
const int default_http_port
Definition: io.cc:62
A Regular expression class to go with the CSTR EST_String class.
Definition: EST_Regex.h:56
int fd_open_tcp(const char *host, int port, const char *text, const char *r_or_w)
Definition: io.cc:327
#define NIL
Definition: siod_defs.h:92
#define EST_Regex_max_subexpressions
Definition: EST_Regex.h:150
int fd_open_url(const char *protocol, const char *host, const char *port, const char *path, const char *r_or_w)
Definition: io.cc:354
#define MAX_LINE_LENGTH
Definition: io.cc:65
int ssize_t
void err(const char *message, LISP x) EST_NORETURN
Definition: slib.cc:608
int fd_open_file(const char *name, const char *r_or_w)
Definition: io.cc:209
#define SEEK_END
Definition: system.h:28
const int default_ftp_port
Definition: io.cc:63
EST_Regex RxURL("\\([a-z]+\\)://?\\([^/:]+\\)\\(:\\([0-9]+\\)\\)?\\(.*\\)")
#define FALSE
Definition: EST_bool.h:119
NULL
Definition: EST_WFST.cc:55
int matches(const char *e, ssize_t pos=0) const
Exactly match this string?
Definition: EST_String.cc:651
FILE16 *(* open)(const char *, const char *, int, const char *, const char *)
Definition: url.c:107
int fd_open_stdinout(const char *r_or_w)
Definition: io.cc:191
EST_String after(int pos, int len=1) const
Part after pos+len.
Definition: EST_String.h:308
int fd_open_ftp(const char *host, int port, const char *path, const char *r_or_w)
Definition: io.cc:314
EST_String at(int from, int len=0) const
Return part at position.
Definition: EST_String.h:292
#define TRUE
Definition: EST_bool.h:118
int fd_open_http(const char *host, int port, const char *path, const char *r_or_w)
Definition: io.cc:247
int parse_url(const EST_String &url, EST_String &protocol, EST_String &host, EST_String &port, EST_String &path)
Definition: io.cc:80