1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
|
package org.jsoup;
import java.io.IOException;
import java.net.URL;
import java.util.Collection;
import java.util.Map;
import org.jsoup.nodes.Document;
import org.jsoup.parser.Parser;
/**
* A Connection provides a convenient interface to fetch content from the web,
* and parse them into Documents.
* <p>
* To get a new Connection, use {@link org.jsoup.Jsoup#connect(String)}.
* Connections contain {@link Connection.Request} and
* {@link Connection.Response} objects. The request objects are reusable as
* prototype requests.
* <p>
* Request configuration can be made using either the shortcut methods in
* Connection (e.g. {@link #userAgent(String)}), or by methods in the
* Connection.Request object directly. All request configuration must be made
* before the request is executed.
* <p>
* The Connection interface is <b>currently in beta</b> and subject to change.
* Comments, suggestions, and bug reports are welcome.
*/
public interface Connection {
/**
* GET and POST http methods.
*/
public enum Method {
GET, POST
}
/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
*
* @param url
* URL to connect to
* @return this Connection, for chaining
*/
public Connection url(URL url);
/**
* Set the request URL to fetch. The protocol must be HTTP or HTTPS.
*
* @param url
* URL to connect to
* @return this Connection, for chaining
*/
public Connection url(String url);
/**
* Set the request user-agent header.
*
* @param userAgent
* user-agent to use
* @return this Connection, for chaining
*/
public Connection userAgent(String userAgent);
/**
* Set the request timeouts (connect and read). If a timeout occurs, an
* IOException will be thrown. The default timeout is 3 seconds (3000
* millis). A timeout of zero is treated as an infinite timeout.
*
* @param millis
* number of milliseconds (thousandths of a second) before timing
* out connects or reads.
* @return this Connection, for chaining
*/
public Connection timeout(int millis);
/**
* Set the request referrer (aka "referer") header.
*
* @param referrer
* referrer to use
* @return this Connection, for chaining
*/
public Connection referrer(String referrer);
/**
* Configures the connection to (not) follow server redirects. By default
* this is <b>true</b>.
*
* @param followRedirects
* true if server redirects should be followed.
* @return this Connection, for chaining
*/
public Connection followRedirects(boolean followRedirects);
/**
* Set the request method to use, GET or POST. Default is GET.
*
* @param method
* HTTP request method
* @return this Connection, for chaining
*/
public Connection method(Method method);
/**
* Configures the connection to not throw exceptions when a HTTP error
* occurs. (4xx - 5xx, e.g. 404 or 500). By default this is <b>false</b>; an
* IOException is thrown if an error is encountered. If set to <b>true</b>,
* the response is populated with the error body, and the status message
* will reflect the error.
*
* @param ignoreHttpErrors
* - false (default) if HTTP errors should be ignored.
* @return this Connection, for chaining
*/
public Connection ignoreHttpErrors(boolean ignoreHttpErrors);
/**
* Ignore the document's Content-Type when parsing the response. By default
* this is <b>false</b>, an unrecognised content-type will cause an
* IOException to be thrown. (This is to prevent producing garbage by
* attempting to parse a JPEG binary image, for example.) Set to true to
* force a parse attempt regardless of content type.
*
* @param ignoreContentType
* set to true if you would like the content type ignored on
* parsing the response into a Document.
* @return this Connection, for chaining
*/
public Connection ignoreContentType(boolean ignoreContentType);
/**
* Add a request data parameter. Request parameters are sent in the request
* query string for GETs, and in the request body for POSTs. A request may
* have multiple values of the same name.
*
* @param key
* data key
* @param value
* data value
* @return this Connection, for chaining
*/
public Connection data(String key, String value);
/**
* Adds all of the supplied data to the request data parameters
*
* @param data
* map of data parameters
* @return this Connection, for chaining
*/
public Connection data(Map<String, String> data);
/**
* Add a number of request data parameters. Multiple parameters may be set
* at once, e.g.:
* <code>.data("name", "jsoup", "language", "Java", "language", "English");</code>
* creates a query string like:
* <code>?name=jsoup&language=Java&language=English</code>
*
* @param keyvals
* a set of key value pairs.
* @return this Connection, for chaining
*/
public Connection data(String... keyvals);
/**
* Set a request header.
*
* @param name
* header name
* @param value
* header value
* @return this Connection, for chaining
* @see org.jsoup.Connection.Request#headers()
*/
public Connection header(String name, String value);
/**
* Set a cookie to be sent in the request.
*
* @param name
* name of cookie
* @param value
* value of cookie
* @return this Connection, for chaining
*/
public Connection cookie(String name, String value);
/**
* Adds each of the supplied cookies to the request.
*
* @param cookies
* map of cookie name -> value pairs
* @return this Connection, for chaining
*/
public Connection cookies(Map<String, String> cookies);
/**
* Provide an alternate parser to use when parsing the response to a
* Document.
*
* @param parser
* alternate parser
* @return this Connection, for chaining
*/
public Connection parser(Parser parser);
/**
* Execute the request as a GET, and parse the result.
*
* @return parsed Document
* @throws IOException
* on error
*/
public Document get() throws IOException;
/**
* Execute the request as a POST, and parse the result.
*
* @return parsed Document
* @throws IOException
* on error
*/
public Document post() throws IOException;
/**
* Execute the request.
*
* @return a response object
* @throws IOException
* on error
*/
public Response execute() throws IOException;
/**
* Get the request object associated with this connection
*
* @return request
*/
public Request request();
/**
* Set the connection's request
*
* @param request
* new request object
* @return this Connection, for chaining
*/
public Connection request(Request request);
/**
* Get the response, once the request has been executed
*
* @return response
*/
public Response response();
/**
* Set the connection's response
*
* @param response
* new response
* @return this Connection, for chaining
*/
public Connection response(Response response);
/**
* Common methods for Requests and Responses
*
* @param <T>
* Type of Base, either Request or Response
*/
interface Base<T extends Base> {
/**
* Get the URL
*
* @return URL
*/
public URL url();
/**
* Set the URL
*
* @param url
* new URL
* @return this, for chaining
*/
public T url(URL url);
/**
* Get the request method
*
* @return method
*/
public Method method();
/**
* Set the request method
*
* @param method
* new method
* @return this, for chaining
*/
public T method(Method method);
/**
* Get the value of a header. This is a simplified header model, where a
* header may only have one value.
* <p>
* Header names are case insensitive.
*
* @param name
* name of header (case insensitive)
* @return value of header, or null if not set.
* @see #hasHeader(String)
* @see #cookie(String)
*/
public String header(String name);
/**
* Set a header. This method will overwrite any existing header with the
* same case insensitive name.
*
* @param name
* Name of header
* @param value
* Value of header
* @return this, for chaining
*/
public T header(String name, String value);
/**
* Check if a header is present
*
* @param name
* name of header (case insensitive)
* @return if the header is present in this request/response
*/
public boolean hasHeader(String name);
/**
* Remove a header by name
*
* @param name
* name of header to remove (case insensitive)
* @return this, for chaining
*/
public T removeHeader(String name);
/**
* Retrieve all of the request/response headers as a map
*
* @return headers
*/
public Map<String, String> headers();
/**
* Get a cookie value by name from this request/response.
* <p>
* Response objects have a simplified cookie model. Each cookie set in
* the response is added to the response object's cookie key=value map.
* The cookie's path, domain, and expiry date are ignored.
*
* @param name
* name of cookie to retrieve.
* @return value of cookie, or null if not set
*/
public String cookie(String name);
/**
* Set a cookie in this request/response.
*
* @param name
* name of cookie
* @param value
* value of cookie
* @return this, for chaining
*/
public T cookie(String name, String value);
/**
* Check if a cookie is present
*
* @param name
* name of cookie
* @return if the cookie is present in this request/response
*/
public boolean hasCookie(String name);
/**
* Remove a cookie by name
*
* @param name
* name of cookie to remove
* @return this, for chaining
*/
public T removeCookie(String name);
/**
* Retrieve all of the request/response cookies as a map
*
* @return cookies
*/
public Map<String, String> cookies();
}
/**
* Represents a HTTP request.
*/
public interface Request extends Base<Request> {
/**
* Get the request timeout, in milliseconds.
*
* @return the timeout in milliseconds.
*/
public int timeout();
/**
* Update the request timeout.
*
* @param millis
* timeout, in milliseconds
* @return this Request, for chaining
*/
public Request timeout(int millis);
/**
* Get the current followRedirects configuration.
*
* @return true if followRedirects is enabled.
*/
public boolean followRedirects();
/**
* Configures the request to (not) follow server redirects. By default
* this is <b>true</b>.
*
* @param followRedirects
* true if server redirects should be followed.
* @return this Request, for chaining
*/
public Request followRedirects(boolean followRedirects);
/**
* Get the current ignoreHttpErrors configuration.
*
* @return true if errors will be ignored; false (default) if HTTP
* errors will cause an IOException to be thrown.
*/
public boolean ignoreHttpErrors();
/**
* Configures the request to ignore HTTP errors in the response.
*
* @param ignoreHttpErrors
* set to true to ignore HTTP errors.
* @return this Request, for chaining
*/
public Request ignoreHttpErrors(boolean ignoreHttpErrors);
/**
* Get the current ignoreContentType configuration.
*
* @return true if invalid content-types will be ignored; false
* (default) if they will cause an IOException to be thrown.
*/
public boolean ignoreContentType();
/**
* Configures the request to ignore the Content-Type of the response.
*
* @param ignoreContentType
* set to true to ignore the content type.
* @return this Request, for chaining
*/
public Request ignoreContentType(boolean ignoreContentType);
/**
* Add a data parameter to the request
*
* @param keyval
* data to add.
* @return this Request, for chaining
*/
public Request data(KeyVal keyval);
/**
* Get all of the request's data parameters
*
* @return collection of keyvals
*/
public Collection<KeyVal> data();
/**
* Specify the parser to use when parsing the document.
*
* @param parser
* parser to use.
* @return this Request, for chaining
*/
public Request parser(Parser parser);
/**
* Get the current parser to use when parsing the document.
*
* @return current Parser
*/
public Parser parser();
}
/**
* Represents a HTTP response.
*/
public interface Response extends Base<Response> {
/**
* Get the status code of the response.
*
* @return status code
*/
public int statusCode();
/**
* Get the status message of the response.
*
* @return status message
*/
public String statusMessage();
/**
* Get the character set name of the response.
*
* @return character set name
*/
public String charset();
/**
* Get the response content type (e.g. "text/html");
*
* @return the response content type
*/
public String contentType();
/**
* Parse the body of the response as a Document.
*
* @return a parsed Document
* @throws IOException
* on error
*/
public Document parse() throws IOException;
/**
* Get the body of the response as a plain string.
*
* @return body
*/
public String body();
/**
* Get the body of the response as an array of bytes.
*
* @return body bytes
*/
public byte[] bodyAsBytes();
}
/**
* A Key Value tuple.
*/
public interface KeyVal {
/**
* Update the key of a keyval
*
* @param key
* new key
* @return this KeyVal, for chaining
*/
public KeyVal key(String key);
/**
* Get the key of a keyval
*
* @return the key
*/
public String key();
/**
* Update the value of a keyval
*
* @param value
* the new value
* @return this KeyVal, for chaining
*/
public KeyVal value(String value);
/**
* Get the value of a keyval
*
* @return the value
*/
public String value();
}
}
|