001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.fileupload2.core; 018 019import java.io.ByteArrayOutputStream; 020import java.io.IOException; 021import java.io.InputStream; 022import java.io.OutputStream; 023import java.io.UnsupportedEncodingException; 024import java.nio.charset.Charset; 025 026import org.apache.commons.fileupload2.core.FileItemInput.ItemSkippedException; 027import org.apache.commons.io.Charsets; 028import org.apache.commons.io.IOUtils; 029import org.apache.commons.io.build.AbstractOrigin; 030import org.apache.commons.io.build.AbstractStreamBuilder; 031import org.apache.commons.io.output.NullOutputStream; 032 033/** 034 * Low-level API for processing file uploads. 035 * 036 * <p> 037 * This class can be used to process data streams conforming to MIME 'multipart' format as defined in <a href="https://www.ietf.org/rfc/rfc1867.txt">RFC 038 * 1867</a>. Arbitrarily large amounts of data in the stream can be processed under constant memory usage. 039 * </p> 040 * <p> 041 * The format of the stream is defined in the following way: 042 * </p> 043 * <pre> 044 * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br> 045 * encapsulation := delimiter body CRLF<br> 046 * delimiter := "--" boundary CRLF<br> 047 * close-delimiter := "--" boundary "--"<br> 048 * preamble := <ignore><br> 049 * epilogue := <ignore><br> 050 * body := header-part CRLF body-part<br> 051 * header-part := 1*header CRLF<br> 052 * header := header-name ":" header-value<br> 053 * header-name := <printable ASCII characters except ":"><br> 054 * header-value := <any ASCII characters except CR & LF><br> 055 * body-data := <arbitrary data><br> 056 * </pre> 057 * 058 * <p> 059 * Note that body-data can contain another mulipart entity. There is limited support for single pass processing of such nested streams. The nested stream is 060 * <strong>required</strong> to have a boundary token of the same length as the parent stream (see {@link #setBoundary(byte[])}). 061 * </p> 062 * <p> 063 * Here is an example of usage of this class: 064 * </p> 065 * 066 * <pre> 067 * try { 068 * MultipartInput multipartStream = MultipartInput.builder() 069 * .setBoundary(boundary) 070 * .setInputStream(input) 071 * .get(); 072 * boolean nextPart = multipartStream.skipPreamble(); 073 * OutputStream output; 074 * while (nextPart) { 075 * String header = multipartStream.readHeaders(); 076 * // process headers 077 * // create some output stream 078 * multipartStream.readBodyData(output); 079 * nextPart = multipartStream.readBoundary(); 080 * } 081 * } catch (MultipartInput.MalformedStreamException e) { 082 * // the stream failed to follow required syntax 083 * } catch (IOException e) { 084 * // a read or write error occurred 085 * } 086 * </pre> 087 */ 088public final class MultipartInput { 089 090 /** 091 * Builds a new {@link MultipartInput} instance. 092 * <p> 093 * For example: 094 * </p> 095 * 096 * <pre>{@code 097 * MultipartInput factory = MultipartInput.builder().setPath(path).setBufferSize(DEFAULT_THRESHOLD).get(); 098 * } 099 * </pre> 100 */ 101 public static class Builder extends AbstractStreamBuilder<MultipartInput, Builder> { 102 103 /** 104 * Boundary. 105 */ 106 private byte[] boundary; 107 108 /** 109 * Progress notifier. 110 */ 111 private ProgressNotifier progressNotifier; 112 113 /** 114 * The per part size limit for headers. 115 */ 116 private int maxPartHeaderSize = DEFAULT_PART_HEADER_SIZE_MAX; 117 118 /** 119 * Constructs a new instance. 120 */ 121 public Builder() { 122 setBufferSizeDefault(DEFAULT_BUFSIZE); 123 } 124 125 /** 126 * Constructs a new instance. 127 * <p> 128 * This builder uses the InputStream, buffer size, boundary and progress notifier aspects. 129 * </p> 130 * <p> 131 * You must provide an origin that can be converted to a Reader by this builder, otherwise, this call will throw an 132 * {@link UnsupportedOperationException}. 133 * </p> 134 * 135 * @return a new instance. 136 * @throws IOException if an I/O error occurs. 137 * @throws UnsupportedOperationException if the origin cannot provide a Path. 138 * @see AbstractOrigin#getReader(Charset) 139 */ 140 @Override 141 public MultipartInput get() throws IOException { 142 return new MultipartInput(this); 143 } 144 145 /** 146 * Gets the per part size limit for headers. 147 * 148 * @return The maximum size of the headers in bytes. 149 * @since 2.0.0-M5 150 */ 151 public int getMaxPartHeaderSize() { 152 return maxPartHeaderSize; 153 } 154 155 /** 156 * Sets the boundary. 157 * 158 * @param boundary the boundary. 159 * @return {@code this} instance. 160 */ 161 public Builder setBoundary(final byte[] boundary) { 162 this.boundary = boundary; 163 return this; 164 } 165 166 /** 167 * Sets the per part size limit for headers. 168 * @param partHeaderSizeMax The maximum size of the headers in bytes. 169 * @return This builder. 170 * @since 2.0.0-M5 171 */ 172 public Builder setMaxPartHeaderSize(final int partHeaderSizeMax) { 173 this.maxPartHeaderSize = partHeaderSizeMax; 174 return this; 175 } 176 177 /** 178 * Sets the progress notifier. 179 * 180 * @param progressNotifier progress notifier. 181 * @return {@code this} instance. 182 */ 183 public Builder setProgressNotifier(final ProgressNotifier progressNotifier) { 184 this.progressNotifier = progressNotifier; 185 return this; 186 } 187 } 188 189 /** 190 * Signals an attempt to set an invalid boundary token. 191 */ 192 public static class FileUploadBoundaryException extends FileUploadException { 193 194 /** 195 * The UID to use when serializing this instance. 196 */ 197 private static final long serialVersionUID = 2; 198 199 /** 200 * Constructs an instance with the specified detail message. 201 * 202 * @param message The detail message (which is saved for later retrieval by the {@link #getMessage()} method) 203 */ 204 public FileUploadBoundaryException(final String message) { 205 super(message); 206 } 207 208 } 209 210 /** 211 * An {@link InputStream} for reading an items contents. 212 */ 213 public class ItemInputStream extends InputStream { 214 215 /** 216 * Offset when converting negative bytes to integers. 217 */ 218 private static final int BYTE_POSITIVE_OFFSET = 256; 219 220 /** 221 * The number of bytes, which have been read so far. 222 */ 223 private long total; 224 225 /** 226 * The number of bytes, which must be hold, because they might be a part of the boundary. 227 */ 228 private int pad; 229 230 /** 231 * The current offset in the buffer. 232 */ 233 private int pos; 234 235 /** 236 * Whether the stream is already closed. 237 */ 238 private boolean closed; 239 240 /** 241 * Creates a new instance. 242 */ 243 ItemInputStream() { 244 findSeparator(); 245 } 246 247 /** 248 * Returns the number of bytes, which are currently available, without blocking. 249 * 250 * @throws IOException An I/O error occurs. 251 * @return Number of bytes in the buffer. 252 */ 253 @Override 254 public int available() throws IOException { 255 if (pos == -1) { 256 return tail - head - pad; 257 } 258 return pos - head; 259 } 260 261 private void checkOpen() throws ItemSkippedException { 262 if (closed) { 263 throw new FileItemInput.ItemSkippedException("checkOpen()"); 264 } 265 } 266 267 /** 268 * Closes the input stream. 269 * 270 * @throws IOException An I/O error occurred. 271 */ 272 @Override 273 public void close() throws IOException { 274 close(false); 275 } 276 277 /** 278 * Closes the input stream. 279 * 280 * @param closeUnderlying Whether to close the underlying stream (hard close) 281 * @throws IOException An I/O error occurred. 282 */ 283 public void close(final boolean closeUnderlying) throws IOException { 284 if (closed) { 285 return; 286 } 287 if (closeUnderlying) { 288 closed = true; 289 input.close(); 290 } else { 291 for (;;) { 292 var avail = available(); 293 if (avail == 0) { 294 avail = makeAvailable(); 295 if (avail == 0) { 296 break; 297 } 298 } 299 if (skip(avail) != avail) { 300 // TODO What to do? 301 } 302 } 303 } 304 closed = true; 305 } 306 307 /** 308 * Called for finding the separator. 309 */ 310 private void findSeparator() { 311 pos = MultipartInput.this.findSeparator(); 312 if (pos == -1) { 313 if (tail - head > keepRegion) { 314 pad = keepRegion; 315 } else { 316 pad = tail - head; 317 } 318 } 319 } 320 321 /** 322 * Gets the number of bytes, which have been read by the stream. 323 * 324 * @return Number of bytes, which have been read so far. 325 */ 326 public long getBytesRead() { 327 return total; 328 } 329 330 /** 331 * Tests whether this instance is closed. 332 * 333 * @return whether this instance is closed. 334 */ 335 public boolean isClosed() { 336 return closed; 337 } 338 339 /** 340 * Attempts to read more data. 341 * 342 * @return Number of available bytes 343 * @throws IOException An I/O error occurred. 344 */ 345 private int makeAvailable() throws IOException { 346 if (pos != -1) { 347 return 0; 348 } 349 350 // Move the data to the beginning of the buffer. 351 total += tail - head - pad; 352 System.arraycopy(buffer, tail - pad, buffer, 0, pad); 353 354 // Refill buffer with new data. 355 head = 0; 356 tail = pad; 357 358 for (;;) { 359 final var bytesRead = input.read(buffer, tail, bufSize - tail); 360 if (bytesRead == -1) { 361 // The last pad amount is left in the buffer. 362 // Boundary can't be in there so signal an error 363 // condition. 364 final var msg = "Stream ended unexpectedly"; 365 throw new MalformedStreamException(msg); 366 } 367 if (notifier != null) { 368 notifier.noteBytesRead(bytesRead); 369 } 370 tail += bytesRead; 371 372 findSeparator(); 373 final var av = available(); 374 375 if (av > 0 || pos != -1) { 376 return av; 377 } 378 } 379 } 380 381 /** 382 * Reads the next byte in the stream. 383 * 384 * @return The next byte in the stream, as a non-negative integer, or -1 for EOF. 385 * @throws IOException An I/O error occurred. 386 */ 387 @Override 388 public int read() throws IOException { 389 checkOpen(); 390 if (available() == 0 && makeAvailable() == 0) { 391 return -1; 392 } 393 ++total; 394 final int b = buffer[head++]; 395 if (b >= 0) { 396 return b; 397 } 398 return b + BYTE_POSITIVE_OFFSET; 399 } 400 401 /** 402 * Reads bytes into the given buffer. 403 * 404 * @param b The destination buffer, where to write to. 405 * @param off Offset of the first byte in the buffer. 406 * @param len Maximum number of bytes to read. 407 * @return Number of bytes, which have been actually read, or -1 for EOF. 408 * @throws IOException An I/O error occurred. 409 */ 410 @Override 411 public int read(final byte[] b, final int off, final int len) throws IOException { 412 checkOpen(); 413 if (len == 0) { 414 return 0; 415 } 416 var res = available(); 417 if (res == 0) { 418 res = makeAvailable(); 419 if (res == 0) { 420 return -1; 421 } 422 } 423 res = Math.min(res, len); 424 System.arraycopy(buffer, head, b, off, res); 425 head += res; 426 total += res; 427 return res; 428 } 429 430 /** 431 * Skips the given number of bytes. 432 * 433 * @param bytes Number of bytes to skip. 434 * @return The number of bytes, which have actually been skipped. 435 * @throws IOException An I/O error occurred. 436 */ 437 @Override 438 public long skip(final long bytes) throws IOException { 439 checkOpen(); 440 var available = available(); 441 if (available == 0) { 442 available = makeAvailable(); 443 if (available == 0) { 444 return 0; 445 } 446 } 447 // Fix "Implicit narrowing conversion in compound assignment" 448 // https://github.com/apache/commons-fileupload/security/code-scanning/118 449 // Math.min always returns an int because available is an int. 450 final var res = Math.toIntExact(Math.min(available, bytes)); 451 head += res; 452 return res; 453 } 454 455 } 456 457 /** 458 * Signals that the input stream fails to follow the required syntax. 459 */ 460 public static class MalformedStreamException extends FileUploadException { 461 462 /** 463 * The UID to use when serializing this instance. 464 */ 465 private static final long serialVersionUID = 2; 466 467 /** 468 * Constructs an {@code MalformedStreamException} with the specified detail message. 469 * 470 * @param message The detail message. 471 */ 472 public MalformedStreamException(final String message) { 473 super(message); 474 } 475 476 /** 477 * Constructs an {@code MalformedStreamException} with the specified detail message. 478 * 479 * @param message The detail message. 480 * @param cause The cause (which is saved for later retrieval by the {@link #getCause()} method). (A null value is permitted, and indicates that the 481 * cause is nonexistent or unknown.) 482 */ 483 public MalformedStreamException(final String message, final Throwable cause) { 484 super(message, cause); 485 } 486 487 } 488 489 /** 490 * Internal class, which is used to invoke the {@link ProgressListener}. 491 */ 492 public static class ProgressNotifier { 493 494 /** 495 * The listener to invoke. 496 */ 497 private final ProgressListener progressListener; 498 499 /** 500 * Number of expected bytes, if known, or -1. 501 */ 502 private final long contentLength; 503 504 /** 505 * Number of bytes, which have been read so far. 506 */ 507 private long bytesRead; 508 509 /** 510 * Number of items, which have been read so far. 511 */ 512 private int items; 513 514 /** 515 * Creates a new instance with the given listener and content length. 516 * 517 * @param progressListener The listener to invoke. 518 * @param contentLength The expected content length. 519 */ 520 public ProgressNotifier(final ProgressListener progressListener, final long contentLength) { 521 this.progressListener = progressListener != null ? progressListener : ProgressListener.NOP; 522 this.contentLength = contentLength; 523 } 524 525 /** 526 * Called to indicate that bytes have been read. 527 * 528 * @param byteCount Number of bytes, which have been read. 529 */ 530 void noteBytesRead(final int byteCount) { 531 // 532 // Indicates, that the given number of bytes have been read from the input stream. 533 // 534 bytesRead += byteCount; 535 notifyListener(); 536 } 537 538 /** 539 * Called to indicate, that a new file item has been detected. 540 */ 541 public void noteItem() { 542 ++items; 543 notifyListener(); 544 } 545 546 /** 547 * Called for notifying the listener. 548 */ 549 private void notifyListener() { 550 progressListener.update(bytesRead, contentLength, items); 551 } 552 553 } 554 555 /** 556 * The Carriage Return ASCII character value. 557 */ 558 public static final byte CR = 0x0D; 559 560 /** 561 * The Line Feed ASCII character value. 562 */ 563 public static final byte LF = 0x0A; 564 565 /** 566 * The dash (-) ASCII character value. 567 */ 568 public static final byte DASH = 0x2D; 569 570 /** 571 * The default length of the buffer used for processing a request. 572 */ 573 static final int DEFAULT_BUFSIZE = 4096; 574 575 /** 576 * Default per part header size limit in bytes. 577 * @since 2.0.0-M4 578 */ 579 public static final int DEFAULT_PART_HEADER_SIZE_MAX = 512; 580 581 /** 582 * A byte sequence that marks the end of {@code header-part} ({@code CRLFCRLF}). 583 */ 584 static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF }; 585 586 /** 587 * A byte sequence that that follows a delimiter that will be followed by an encapsulation ({@code CRLF}). 588 */ 589 static final byte[] FIELD_SEPARATOR = { CR, LF }; 590 591 /** 592 * A byte sequence that that follows a delimiter of the last encapsulation in the stream ({@code --}). 593 */ 594 static final byte[] STREAM_TERMINATOR = { DASH, DASH }; 595 596 /** 597 * A byte sequence that precedes a boundary ({@code CRLF--}). 598 */ 599 static final byte[] BOUNDARY_PREFIX = { CR, LF, DASH, DASH }; 600 601 /** 602 * Compares {@code count} first bytes in the arrays {@code a} and {@code b}. 603 * 604 * @param a The first array to compare. 605 * @param b The second array to compare. 606 * @param count How many bytes should be compared. 607 * @return {@code true} if {@code count} first bytes in arrays {@code a} and {@code b} are equal. 608 */ 609 static boolean arrayEquals(final byte[] a, final byte[] b, final int count) { 610 for (var i = 0; i < count; i++) { 611 if (a[i] != b[i]) { 612 return false; 613 } 614 } 615 return true; 616 } 617 618 /** 619 * Constructs a new {@link Builder}. 620 * 621 * @return a new {@link Builder}. 622 */ 623 public static Builder builder() { 624 return new Builder(); 625 } 626 627 /** 628 * The input stream from which data is read. 629 */ 630 private final InputStream input; 631 632 /** 633 * The length of the boundary token plus the leading {@code CRLF--}. 634 */ 635 private int boundaryLength; 636 637 /** 638 * The amount of data, in bytes, that must be kept in the buffer in order to detect delimiters reliably. 639 */ 640 private final int keepRegion; 641 642 /** 643 * The byte sequence that partitions the stream. 644 */ 645 private final byte[] boundary; 646 647 /** 648 * The table for Knuth-Morris-Pratt search algorithm. 649 */ 650 private final int[] boundaryTable; 651 652 /** 653 * The length of the buffer used for processing the request. 654 */ 655 private final int bufSize; 656 657 /** 658 * The buffer used for processing the request. 659 */ 660 private final byte[] buffer; 661 662 /** 663 * The index of first valid character in the buffer. <br> 664 * 0 <= head < bufSize 665 */ 666 private int head; 667 668 /** 669 * The index of last valid character in the buffer + 1. <br> 670 * 0 <= tail <= bufSize 671 */ 672 private int tail; 673 674 /** 675 * The content encoding to use when reading headers. 676 */ 677 private Charset headerCharset; 678 679 /** 680 * The progress notifier, if any, or null. 681 */ 682 private final ProgressNotifier notifier; 683 684 /** 685 * The maximum size of the headers in bytes. 686 */ 687 private final int maxPartHeaderSize; 688 689 /** 690 * Constructs a {@code MultipartInput} with a custom size buffer. 691 * <p> 692 * Note that the buffer must be at least big enough to contain the boundary string, plus 4 characters for CR/LF and double dash, plus at least one byte of 693 * data. Too small a buffer size setting will degrade performance. 694 * </p> 695 * 696 * @param input The {@code InputStream} to serve as a data source. 697 * @param boundary The token used for dividing the stream into {@code encapsulations}. 698 * @param bufferSize The size of the buffer to be used, in bytes. 699 * @param notifier The notifier, which is used for calling the progress listener, if any. 700 * @throws IOException Thrown if an I/O error occurs. 701 * @throws IllegalArgumentException If the buffer size is too small. 702 */ 703 private MultipartInput(final Builder builder) throws IOException { 704 if (builder.boundary == null) { 705 throw new IllegalArgumentException("boundary may not be null"); 706 } 707 // We prepend CR/LF to the boundary to chop trailing CR/LF from 708 // body-data tokens. 709 this.boundaryLength = builder.boundary.length + BOUNDARY_PREFIX.length; 710 if (builder.getBufferSize() < this.boundaryLength + 1) { 711 throw new IllegalArgumentException("The buffer size specified for the MultipartInput is too small"); 712 } 713 this.input = builder.getInputStream(); 714 this.bufSize = Math.max(builder.getBufferSize(), boundaryLength * 2); 715 this.buffer = new byte[this.bufSize]; 716 this.notifier = builder.progressNotifier; 717 this.maxPartHeaderSize = builder.getMaxPartHeaderSize(); 718 this.boundary = new byte[this.boundaryLength]; 719 this.boundaryTable = new int[this.boundaryLength + 1]; 720 this.keepRegion = this.boundary.length; 721 System.arraycopy(BOUNDARY_PREFIX, 0, this.boundary, 0, BOUNDARY_PREFIX.length); 722 System.arraycopy(builder.boundary, 0, this.boundary, BOUNDARY_PREFIX.length, builder.boundary.length); 723 computeBoundaryTable(); 724 head = 0; 725 tail = 0; 726 } 727 728 /** 729 * Computes the table used for Knuth-Morris-Pratt search algorithm. 730 */ 731 private void computeBoundaryTable() { 732 var position = 2; 733 var candidate = 0; 734 735 boundaryTable[0] = -1; 736 boundaryTable[1] = 0; 737 738 while (position <= boundaryLength) { 739 if (boundary[position - 1] == boundary[candidate]) { 740 boundaryTable[position] = candidate + 1; 741 candidate++; 742 position++; 743 } else if (candidate > 0) { 744 candidate = boundaryTable[candidate]; 745 } else { 746 boundaryTable[position] = 0; 747 position++; 748 } 749 } 750 } 751 752 /** 753 * Reads {@code body-data} from the current {@code encapsulation} and discards it. 754 * <p> 755 * Use this method to skip encapsulations you don't need or don't understand. 756 * </p> 757 * 758 * @return The amount of data discarded. 759 * @throws MalformedStreamException if the stream ends unexpectedly. 760 * @throws IOException if an i/o error occurs. 761 */ 762 public long discardBodyData() throws MalformedStreamException, IOException { 763 return readBodyData(NullOutputStream.INSTANCE); 764 } 765 766 /** 767 * Searches for a byte of specified value in the {@code buffer}, starting at the specified {@code position}. 768 * 769 * @param value The value to find. 770 * @param pos The starting position for searching. 771 * @return The position of byte found, counting from beginning of the {@code buffer}, or {@code -1} if not found. 772 */ 773 protected int findByte(final byte value, final int pos) { 774 for (var i = pos; i < tail; i++) { 775 if (buffer[i] == value) { 776 return i; 777 } 778 } 779 780 return -1; 781 } 782 783 /** 784 * Searches for the {@code boundary} in the {@code buffer} region delimited by {@code head} and {@code tail}. 785 * 786 * @return The position of the boundary found, counting from the beginning of the {@code buffer}, or {@code -1} if not found. 787 */ 788 protected int findSeparator() { 789 var bufferPos = this.head; 790 var tablePos = 0; 791 while (bufferPos < this.tail) { 792 while (tablePos >= 0 && buffer[bufferPos] != boundary[tablePos]) { 793 tablePos = boundaryTable[tablePos]; 794 } 795 bufferPos++; 796 tablePos++; 797 if (tablePos == boundaryLength) { 798 return bufferPos - boundaryLength; 799 } 800 } 801 return -1; 802 } 803 804 /** 805 * Gets the character encoding used when reading the headers of an individual part. When not specified, or {@code null}, the platform default encoding is 806 * used. 807 * 808 * @return The encoding used to read part headers. 809 */ 810 public Charset getHeaderCharset() { 811 return headerCharset; 812 } 813 814 /** 815 * Returns the per part size limit for headers. 816 * 817 * @return The maximum size of the headers in bytes. 818 * @since 2.0.0-M5 819 */ 820 public int getMaxPartHeaderSize() { 821 return maxPartHeaderSize; 822 } 823 824 /** 825 * Creates a new {@link ItemInputStream}. 826 * 827 * @return A new instance of {@link ItemInputStream}. 828 */ 829 public ItemInputStream newInputStream() { 830 return new ItemInputStream(); 831 } 832 833 /** 834 * Reads {@code body-data} from the current {@code encapsulation} and writes its contents into the output {@code Stream}. 835 * <p> 836 * Arbitrary large amounts of data can be processed by this method using a constant size buffer. (see {@link MultipartInput#builder()}). 837 * </p> 838 * 839 * @param output The {@code Stream} to write data into. May be null, in which case this method is equivalent to {@link #discardBodyData()}. 840 * @return the amount of data written. 841 * @throws MalformedStreamException if the stream ends unexpectedly. 842 * @throws IOException if an i/o error occurs. 843 */ 844 public long readBodyData(final OutputStream output) throws MalformedStreamException, IOException { 845 try (var inputStream = newInputStream()) { 846 return IOUtils.copyLarge(inputStream, output); 847 } 848 } 849 850 /** 851 * Skips a {@code boundary} token, and checks whether more {@code encapsulations} are contained in the stream. 852 * 853 * @return {@code true} if there are more encapsulations in this stream; {@code false} otherwise. 854 * @throws FileUploadSizeException if the bytes read from the stream exceeded the size limits 855 * @throws MalformedStreamException if the stream ends unexpectedly or fails to follow required syntax. 856 */ 857 public boolean readBoundary() throws FileUploadSizeException, MalformedStreamException { 858 final var marker = new byte[2]; 859 final boolean nextChunk; 860 head += boundaryLength; 861 try { 862 marker[0] = readByte(); 863 if (marker[0] == LF) { 864 // Work around IE5 Mac bug with input type=image. 865 // Because the boundary delimiter, not including the trailing 866 // CRLF, must not appear within any file (RFC 2046, section 867 // 5.1.1), we know the missing CR is due to a buggy browser 868 // rather than a file containing something similar to a 869 // boundary. 870 return true; 871 } 872 873 marker[1] = readByte(); 874 if (arrayEquals(marker, STREAM_TERMINATOR, 2)) { 875 nextChunk = false; 876 } else if (arrayEquals(marker, FIELD_SEPARATOR, 2)) { 877 nextChunk = true; 878 } else { 879 throw new MalformedStreamException("Unexpected characters follow a boundary"); 880 } 881 } catch (final FileUploadSizeException e) { 882 throw e; 883 } catch (final IOException e) { 884 throw new MalformedStreamException("Stream ended unexpectedly", e); 885 } 886 return nextChunk; 887 } 888 889 /** 890 * Reads a byte from the {@code buffer}, and refills it as necessary. 891 * 892 * @return The next byte from the input stream. 893 * @throws IOException if there is no more data available. 894 */ 895 public byte readByte() throws IOException { 896 // Buffer depleted ? 897 if (head == tail) { 898 head = 0; 899 // Refill. 900 tail = input.read(buffer, head, bufSize); 901 if (tail == -1) { 902 // No more data available. 903 throw new IOException("No more data is available"); 904 } 905 if (notifier != null) { 906 notifier.noteBytesRead(tail); 907 } 908 } 909 return buffer[head++]; 910 } 911 912 /** 913 * Reads the {@code header-part} of the current {@code encapsulation}. 914 * <p> 915 * Headers are returned verbatim to the input stream, including the trailing {@code CRLF} marker. Parsing is left to the application. 916 * </p> 917 * <p> 918 * <strong>TODO</strong> allow limiting maximum header size to protect against abuse. 919 * </p> 920 * 921 * @return The {@code header-part} of the current encapsulation. 922 * @throws FileUploadSizeException if the bytes read from the stream exceeded the size limits. 923 * @throws MalformedStreamException if the stream ends unexpectedly. 924 */ 925 public String readHeaders() throws FileUploadSizeException, MalformedStreamException { 926 var i = 0; 927 byte b; 928 // to support multi-byte characters 929 final var baos = new ByteArrayOutputStream(); 930 var size = 0; 931 while (i < HEADER_SEPARATOR.length) { 932 try { 933 b = readByte(); 934 } catch (final FileUploadSizeException e) { 935 // wraps a FileUploadSizeException, re-throw as it will be unwrapped later 936 throw e; 937 } catch (final IOException e) { 938 throw new MalformedStreamException("Stream ended unexpectedly", e); 939 } 940 final int phsm = getMaxPartHeaderSize(); 941 if (phsm != -1 && ++size > phsm) { 942 throw new FileUploadSizeException( 943 String.format("Header section has more than %s bytes (maybe it is not properly terminated)", Integer.valueOf(phsm)), phsm, size); 944 } 945 if (b == HEADER_SEPARATOR[i]) { 946 i++; 947 } else { 948 i = 0; 949 } 950 baos.write(b); 951 } 952 try { 953 return baos.toString(Charsets.toCharset(headerCharset, Charset.defaultCharset()).name()); 954 } catch (final UnsupportedEncodingException e) { 955 // not possible 956 throw new IllegalStateException(e); 957 } 958 } 959 960 /** 961 * Changes the boundary token used for partitioning the stream. 962 * <p> 963 * This method allows single pass processing of nested multipart streams. 964 * </p> 965 * <p> 966 * The boundary token of the nested stream is {@code required} to be of the same length as the boundary token in parent stream. 967 * </p> 968 * <p> 969 * Restoring the parent stream boundary token after processing of a nested stream is left to the application. 970 * </p> 971 * 972 * @param boundary The boundary to be used for parsing of the nested stream. 973 * @throws FileUploadBoundaryException if the {@code boundary} has a different length than the one being currently parsed. 974 */ 975 public void setBoundary(final byte[] boundary) throws FileUploadBoundaryException { 976 if (boundary.length != boundaryLength - BOUNDARY_PREFIX.length) { 977 throw new FileUploadBoundaryException("The length of a boundary token cannot be changed"); 978 } 979 System.arraycopy(boundary, 0, this.boundary, BOUNDARY_PREFIX.length, boundary.length); 980 computeBoundaryTable(); 981 } 982 983 /** 984 * Sets the character encoding to be used when reading the headers of individual parts. When not specified, or {@code null}, the platform default encoding 985 * is used. 986 * 987 * @param headerCharset The encoding used to read part headers. 988 */ 989 public void setHeaderCharset(final Charset headerCharset) { 990 this.headerCharset = headerCharset; 991 } 992 993 /** 994 * Finds the beginning of the first {@code encapsulation}. 995 * 996 * @return {@code true} if an {@code encapsulation} was found in the stream. 997 * @throws IOException if an i/o error occurs. 998 */ 999 public boolean skipPreamble() throws IOException { 1000 // First delimiter may be not preceded with a CRLF. 1001 System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2); 1002 boundaryLength = boundary.length - 2; 1003 computeBoundaryTable(); 1004 try { 1005 // Discard all data up to the delimiter. 1006 discardBodyData(); 1007 1008 // Read boundary - if succeeded, the stream contains an 1009 // encapsulation. 1010 return readBoundary(); 1011 } catch (final MalformedStreamException e) { 1012 return false; 1013 } finally { 1014 // Restore delimiter. 1015 System.arraycopy(boundary, 0, boundary, 2, boundary.length - 2); 1016 boundaryLength = boundary.length; 1017 boundary[0] = CR; 1018 boundary[1] = LF; 1019 computeBoundaryTable(); 1020 } 1021 } 1022 1023}