The SWORD Project  1.9.0.svnversion
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
url.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  *
3  * url.cpp - code for an URL parser utility class
4  *
5  * $Id: url.cpp 3439 2016-10-23 08:32:02Z scribe $
6  *
7  * Copyright 2004-2013 CrossWire Bible Society (http://www.crosswire.org)
8  * CrossWire Bible Society
9  * P. O. Box 2528
10  * Tempe, AZ 85280-2528
11  *
12  * This program is free software; you can redistribute it and/or modify it
13  * under the terms of the GNU General Public License as published by the
14  * Free Software Foundation version 2.
15  *
16  * This program is distributed in the hope that it will be useful, but
17  * WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * General Public License for more details.
20  *
21  */
22 
23 #include <url.h>
24 #include <swlog.h>
25 
26 //system includes
27 #include <ctype.h>
28 #include <map>
29 #include <stdio.h>
30 #include <iostream>
31 
32 
34 
35 
36 namespace {
37  typedef std::map<unsigned char, SWBuf> DataMap;
39  static class __init {
40  public:
41  __init() {
42  for (unsigned short int c = 32; c <= 255; ++c) { //first set all encoding chars
43  if ( (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || strchr("-_.!~*'()", c)) {
44  continue; //we don't need an encoding for this char
45  }
46 
47  SWBuf buf;
48  buf.setFormatted("%%%-.2X", c);
49  m[c] = buf;
50  }
51  //the special encodings for certain chars
52  m[' '] = '+';
53  }
54  } ___init;
55 }
56 
57 
61 URL::URL(const char *url)
62  : url(""),
63  protocol(""),
64  hostname(""),
65  path("")
66 {
67  if (url && strlen(url)) {
68  this->url = url;
69  parse();
70  }
71 }
72 
73 
74 const char *URL::getProtocol() const {
75  return protocol.c_str();
76 }
77 
78 
79 const char *URL::getHostName () const {
80  return hostname.c_str();
81 }
82 
83 
84 const char *URL::getPath() const {
85  return path.c_str();
86 }
87 
88 
90  return parameterMap;
91 }
92 
93 
98 const char *URL::getParameterValue(const char *name) const {
99  static SWBuf emptyStr("");
100 
101  ParameterMap::const_iterator it = parameterMap.find(name);
102  static SWBuf retVal;
103 
104  if (it != parameterMap.end())
105  retVal = it->second.c_str();
106  else
107  retVal = emptyStr.c_str();
108 
109  return retVal.c_str();
110 }
111 
112 
117 void URL::parse() {
118  /* format example protocol://hostname/path/path/path.pl?param1=value1&amp;param2=value2
119  * we include the script name in the path, so the path would be /path/path/path.pl in this example
120  * &amp; could also be &
121  */
122 
123  //1. Init
124  const char *urlPtr = url.c_str();
125 
126  protocol = "";
127  hostname = "";
128  path = "";
129  parameterMap.clear();
130 
131  // 2. Get the protocol, which is from the begining to the first ://
132  const char *end = strchr( urlPtr, ':' );
133  if (end) { //protocol was found
134  protocol.append(urlPtr, end-urlPtr);
135  urlPtr = end + 1;
136 
137  //find the end of the protocol separator (e.g. "://")
138  for (; (*urlPtr == ':') || (*urlPtr == '/'); urlPtr++);
139  }
140 
141  //3.Get the hostname part. This is the part from pos up to the first slash
142  bool checkPath = true;
143  bool checkParams = true;
144  bool checkAnchor = true;
145 
146  end = strchr(urlPtr, '/');
147  if (!end) {
148  checkPath = false;
149  end = strchr(urlPtr, '?');
150  }
151  if (!end) {
152  checkParams = false;
153  end = strchr(urlPtr, '#');
154  }
155  if (!end) {
156  checkAnchor = false;
157  end = urlPtr+strlen(urlPtr);
158  }
159 
160  hostname.append(urlPtr, end-urlPtr);
161 
162  urlPtr = end + ((*end)? 1 : 0);
163 
164  if (checkPath) {
165  end = strchr(urlPtr, '?');
166  if (!end) {
167  checkParams = false;
168  end = strchr(urlPtr, '#');
169  }
170  if (!end) {
171  checkAnchor = false;
172  end = urlPtr+strlen(urlPtr);
173  }
174 
175  path.append(urlPtr, end-urlPtr);
176 
177  urlPtr = end + ((*end)? 1 : 0);
178  }
179 
180  if (checkParams) {
181  //5. Fill the map with the parameters and their values
182  SWBuf paramName;
183  SWBuf paramValue;
184 
185  if (checkAnchor) checkAnchor = false;
186 /*
187  end = strchr(urlPtr, '#');
188  if (!end) {
189  checkAnchor = false;
190  end = urlPtr+strlen(urlPtr);
191  }
192 */
193  //end = (start && strchr(start, '?')) ? strchr(start, '?')+1 :0;
194  end = urlPtr;
195  while (end) {
196  paramName = "";
197  paramValue = "";
198 
199  //search for the equal sign to find the value part
200  const char *valueStart = strchr(end, '=');
201  if (valueStart) {
202  const char* valueEnd = strstr(valueStart, "&amp;") ? strstr(valueStart, "&amp;") : strstr(valueStart, "&"); //try to find a new paramter part
203 
204  if (valueEnd) {
205  paramName.append(end, valueStart-end);
206  paramValue.append(valueStart+1, valueEnd-(valueStart+1));
207  }
208  else { //this is the last paramter of the URL
209  paramName.append(end, valueStart-end);
210  paramValue.append(valueStart+1);
211  }
212 
213  if (paramName.length() && paramValue.length()) {//insert the param into the map if it's valid
214  paramName = decode(paramName.c_str());
215  paramValue = decode(paramValue.c_str());
216 
217  parameterMap[ paramName ] = paramValue;
218  }
219  }
220  else {
221  break; //no valid parameter in the url
222  }
223 
224  const char *start = end+1;
225  end = strstr(start, "&amp;") ? strstr(start, "&amp;")+5 : (strstr(start, "&") ? strstr(start, "&")+1 : 0); //try to find a new paramter part
226  }
227  }
228 }
229 
230 
231 const SWBuf URL::encode(const char *urlText) {
232  /*static*/ SWBuf url;
233  url = urlText;
234 
235  SWBuf buf;
236  const long length = url.length();
237  for (long i = 0; i < length; i++) { //fill "buf"
238  const char& c = url[i];
239  buf.append( ((m[c].length()) ? m[c] : SWBuf(c)) );
240  }
241 
242  url = buf;
243  return url;
244 }
245 
246 
247 const SWBuf URL::decode(const char *encoded) {
248  /*static*/ SWBuf text;
249  text = encoded;
250 
251  SWBuf decoded;
252  const long length = text.length();
253  int i = 0;
254 
255  while (i < length) {
256  char a = text[i];
257 
258  if ( a == '+' ) { //handle special cases
259  decoded.append(' ');
260  }
261  else if ( (a == '%') && (i+2 < length)) { //decode the %ab hex encoded char
262  const char b = toupper( text[i+1] );
263  const char c = toupper( text[i+2] );
264 
265  if (isxdigit(b) && isxdigit(c)) { //valid %ab part
266  unsigned int dec = 16 * ((b >= 'A' && b <= 'F') ? (b - 'A' + 10) : (b - '0')); //dec value of the most left digit (b)
267  dec += (c >= 'A' && c <= 'F') ? (c - 'A' + 10) : (c - '0'); //dec value of the right digit (c)
268 
269  decoded.append((char)dec); //append the decoded char
270 
271  i += 2; //we jump over the %ab part; we have to leave out three, but the while loop adds one, too
272  }
273  }
274  else { //just append the char
275  decoded.append(a);
276  }
277 
278  i++;
279  }
280 
281  if (decoded.length()) {
282  text = decoded;
283  }
284  return text;
285 }
286 
287 
289 
#define SWORD_NAMESPACE_START
Definition: defs.h:39
Definition: swbuf.h:47
unsigned long length() const
Definition: swbuf.h:197
SWBuf hostname
Definition: url.h:87
static class SWORD_NAMESPACE_START::__init ___init
std::map< SWBuf, SWBuf > ParameterMap
Definition: url.h:36
const char * getParameterValue(const char *name) const
Definition: url.cpp:98
SWBuf url
Definition: url.h:85
size_t length
Definition: regex.c:7928
const char * c_str() const
Definition: swbuf.h:158
SWBuf & append(const char *str, long max=-1)
Definition: swbuf.h:274
const char * getPath() const
Definition: url.cpp:84
SWBuf protocol
Definition: url.h:86
void parse()
Definition: url.cpp:117
URL(const char *url)
Definition: url.cpp:61
const char * getProtocol() const
Definition: url.cpp:74
std::map< unsigned char, SWBuf > DataMap
Definition: url.cpp:37
const ParameterMap & getParameters() const
Definition: url.cpp:89
#define SWORD_NAMESPACE_END
Definition: defs.h:40
const char * getHostName() const
Definition: url.cpp:79
SWBuf path
Definition: url.h:88
SWBuf & setFormatted(const char *format,...)
Definition: swbuf.cpp:50
ParameterMap parameterMap
Definition: url.h:89
static const SWBuf decode(const char *encodedText)
Definition: url.cpp:247
static const SWBuf encode(const char *urlText)
Definition: url.cpp:231