001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.filter;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Comparator;
023import java.util.Objects;
024import java.util.TreeSet;
025import org.apache.hadoop.hbase.Cell;
026import org.apache.hadoop.hbase.CellUtil;
027import org.apache.hadoop.hbase.PrivateCellUtil;
028import org.apache.hadoop.hbase.exceptions.DeserializationException;
029import org.apache.hadoop.hbase.util.Bytes;
030import org.apache.yetus.audience.InterfaceAudience;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;
035import org.apache.hbase.thirdparty.com.google.protobuf.UnsafeByteOperations;
036
037import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;
038
039/**
040 * This filter is used for selecting only those keys with columns that matches a particular prefix.
041 * For example, if prefix is 'an', it will pass keys will columns like 'and', 'anti' but not keys
042 * with columns like 'ball', 'act'.
043 */
044@InterfaceAudience.Public
045public class MultipleColumnPrefixFilter extends FilterBase {
046  private static final Logger LOG = LoggerFactory.getLogger(MultipleColumnPrefixFilter.class);
047  protected byte[] hint = null;
048  protected TreeSet<byte[]> sortedPrefixes = createTreeSet();
049  private final static int MAX_LOG_PREFIXES = 5;
050
051  public MultipleColumnPrefixFilter(final byte[][] prefixes) {
052    if (prefixes != null) {
053      for (byte[] prefix : prefixes) {
054        if (!sortedPrefixes.add(prefix)) {
055          LOG.error("prefix {} is repeated", Bytes.toString(prefix));
056          throw new IllegalArgumentException("prefixes must be distinct");
057        }
058      }
059    }
060  }
061
062  public byte[][] getPrefix() {
063    int count = 0;
064    byte[][] temp = new byte[sortedPrefixes.size()][];
065    for (byte[] prefixes : sortedPrefixes) {
066      temp[count++] = prefixes;
067    }
068    return temp;
069  }
070
071  @Override
072  public boolean filterRowKey(Cell cell) throws IOException {
073    // Impl in FilterBase might do unnecessary copy for Off heap backed Cells.
074    return false;
075  }
076
077  @Deprecated
078  @Override
079  public ReturnCode filterKeyValue(final Cell c) {
080    return filterCell(c);
081  }
082
083  @Override
084  public ReturnCode filterCell(final Cell c) {
085    if (sortedPrefixes.isEmpty()) {
086      return ReturnCode.INCLUDE;
087    } else {
088      return filterColumn(c);
089    }
090  }
091
092  public ReturnCode filterColumn(Cell cell) {
093    byte[] qualifier = CellUtil.cloneQualifier(cell);
094    TreeSet<byte[]> lesserOrEqualPrefixes =
095      (TreeSet<byte[]>) sortedPrefixes.headSet(qualifier, true);
096
097    if (lesserOrEqualPrefixes.size() != 0) {
098      byte[] largestPrefixSmallerThanQualifier = lesserOrEqualPrefixes.last();
099
100      if (Bytes.startsWith(qualifier, largestPrefixSmallerThanQualifier)) {
101        return ReturnCode.INCLUDE;
102      }
103
104      if (lesserOrEqualPrefixes.size() == sortedPrefixes.size()) {
105        return ReturnCode.NEXT_ROW;
106      } else {
107        hint = sortedPrefixes.higher(largestPrefixSmallerThanQualifier);
108        return ReturnCode.SEEK_NEXT_USING_HINT;
109      }
110    } else {
111      hint = sortedPrefixes.first();
112      return ReturnCode.SEEK_NEXT_USING_HINT;
113    }
114  }
115
116  public static Filter createFilterFromArguments(ArrayList<byte[]> filterArguments) {
117    byte[][] prefixes = new byte[filterArguments.size()][];
118    for (int i = 0; i < filterArguments.size(); i++) {
119      byte[] columnPrefix = ParseFilter.removeQuotesFromByteArray(filterArguments.get(i));
120      prefixes[i] = columnPrefix;
121    }
122    return new MultipleColumnPrefixFilter(prefixes);
123  }
124
125  /** Returns The filter serialized using pb */
126  @Override
127  public byte[] toByteArray() {
128    FilterProtos.MultipleColumnPrefixFilter.Builder builder =
129      FilterProtos.MultipleColumnPrefixFilter.newBuilder();
130    for (byte[] element : sortedPrefixes) {
131      if (element != null) builder.addSortedPrefixes(UnsafeByteOperations.unsafeWrap(element));
132    }
133    return builder.build().toByteArray();
134  }
135
136  /**
137   * @param pbBytes A pb serialized {@link MultipleColumnPrefixFilter} instance
138   * @return An instance of {@link MultipleColumnPrefixFilter} made from <code>bytes</code>
139   * @see #toByteArray
140   */
141  public static MultipleColumnPrefixFilter parseFrom(final byte[] pbBytes)
142    throws DeserializationException {
143    FilterProtos.MultipleColumnPrefixFilter proto;
144    try {
145      proto = FilterProtos.MultipleColumnPrefixFilter.parseFrom(pbBytes);
146    } catch (InvalidProtocolBufferException e) {
147      throw new DeserializationException(e);
148    }
149    int numPrefixes = proto.getSortedPrefixesCount();
150    byte[][] prefixes = new byte[numPrefixes][];
151    for (int i = 0; i < numPrefixes; ++i) {
152      prefixes[i] = proto.getSortedPrefixes(i).toByteArray();
153    }
154
155    return new MultipleColumnPrefixFilter(prefixes);
156  }
157
158  /**
159   * @param o the other filter to compare with
160   * @return true if and only if the fields of the filter that are serialized are equal to the
161   *         corresponding fields in other. Used for testing.
162   */
163  @Override
164  boolean areSerializedFieldsEqual(Filter o) {
165    if (o == this) return true;
166    if (!(o instanceof MultipleColumnPrefixFilter)) return false;
167
168    MultipleColumnPrefixFilter other = (MultipleColumnPrefixFilter) o;
169    return this.sortedPrefixes.equals(other.sortedPrefixes);
170  }
171
172  @Override
173  public Cell getNextCellHint(Cell cell) {
174    return PrivateCellUtil.createFirstOnRowCol(cell, hint, 0, hint.length);
175  }
176
177  public TreeSet<byte[]> createTreeSet() {
178    return new TreeSet<>(new Comparator<Object>() {
179      @Override
180      public int compare(Object o1, Object o2) {
181        if (o1 == null || o2 == null) throw new IllegalArgumentException("prefixes can't be null");
182
183        byte[] b1 = (byte[]) o1;
184        byte[] b2 = (byte[]) o2;
185        return Bytes.compareTo(b1, 0, b1.length, b2, 0, b2.length);
186      }
187    });
188  }
189
190  @Override
191  public String toString() {
192    return toString(MAX_LOG_PREFIXES);
193  }
194
195  protected String toString(int maxPrefixes) {
196    StringBuilder prefixes = new StringBuilder();
197
198    int count = 0;
199    for (byte[] ba : this.sortedPrefixes) {
200      if (count >= maxPrefixes) {
201        break;
202      }
203      ++count;
204      prefixes.append(Bytes.toStringBinary(ba));
205      if (count < this.sortedPrefixes.size() && count < maxPrefixes) {
206        prefixes.append(", ");
207      }
208    }
209
210    return String.format("%s (%d/%d): [%s]", this.getClass().getSimpleName(), count,
211      this.sortedPrefixes.size(), prefixes.toString());
212  }
213
214  @Override
215  public boolean equals(Object obj) {
216    return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj);
217  }
218
219  @Override
220  public int hashCode() {
221    return Objects.hash(this.sortedPrefixes);
222  }
223}