public class HtmlFileListParser
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
private static java.util.regex.Pattern |
APACHE_INDEX_SKIP |
private static java.util.regex.Pattern |
MAILTO_URLS |
private static java.util.regex.Pattern[] |
SKIPS |
private static java.util.regex.Pattern |
URLS_TO_PARENT |
private static java.util.regex.Pattern |
URLS_WITH_PATHS |
Constructor and Description |
---|
HtmlFileListParser() |
Modifier and Type | Method and Description |
---|---|
private static java.lang.String |
cleanLink(java.net.URI baseURI,
java.lang.String link) |
private static boolean |
isAcceptableLink(java.lang.String link) |
static java.util.List<java.lang.String> |
parseFileList(java.lang.String baseurl,
java.io.InputStream stream)
Fetches a raw HTML from a provided InputStream, parses it, and returns the file list.
|
private static final java.util.regex.Pattern APACHE_INDEX_SKIP
private static final java.util.regex.Pattern URLS_WITH_PATHS
private static final java.util.regex.Pattern URLS_TO_PARENT
private static final java.util.regex.Pattern MAILTO_URLS
private static final java.util.regex.Pattern[] SKIPS
public static java.util.List<java.lang.String> parseFileList(java.lang.String baseurl, java.io.InputStream stream) throws TransferFailedException
stream
- the input stream.TransferFailedException
- if there was a problem fetching the raw html.private static java.lang.String cleanLink(java.net.URI baseURI, java.lang.String link)
private static boolean isAcceptableLink(java.lang.String link)