001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *     https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.configuration2.io;
019
020import java.net.MalformedURLException;
021import java.net.URL;
022import java.util.Collections;
023import java.util.LinkedHashSet;
024import java.util.Objects;
025import java.util.Set;
026import java.util.function.Function;
027import java.util.regex.Pattern;
028import java.util.stream.Collectors;
029
030import org.apache.commons.configuration2.ex.ConfigurationDeniedException;
031import org.apache.commons.io.build.AbstractSupplier;
032import org.apache.commons.lang3.StringUtils;
033
034/**
035 * Abstracts services for FileLocationStrategy implementations.
036 * <p>
037 * Note that some FileLocationStrategy implementation use URLs internally to encode file locations.
038 * </p>
039 * <p>
040 * As of version 2.15.0, by default, the only URL schemes allowed are {@code file} and {@code jar}. To override this default, you can either use the system
041 * property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} or build a subclass of {@link AbstractFileLocationStrategy}.
042 * </p>
043 * <strong>Using System Properties</strong>
044 * <p>
045 * The system property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} String value must be a comma-separated list of schemes,
046 * where the default is {@code "file,jar"}, and the complete list is {@code "file,http,https,jar"}.
047 * </p>
048 * <strong>Using a Builder</strong>
049 * <p>
050 * The root builder for {@link AbstractFileLocationStrategy} is {@link AbstractBuilder} where you define allowed schemes and hosts through its setter
051 * methods.
052 * </p>
053 * <p>
054 * For example, to programatically enable the shemes "file", "http", "https", and "jar" for all strategies, you write:
055 * </p>
056 * <pre>{@code
057 * final PropertiesConfiguration pc = new PropertiesConfiguration();
058 *      pc.setIncludeListener(PropertiesConfiguration.NOOP_INCLUDE_LISTENER);
059 *      final FileHandler handler = new FileHandler(pc);
060 *      final CombinedLocationStrategy.Builder builder = new CombinedLocationStrategy.Builder()
061 *              .setSchemes(new TreeSet<>(Arrays.asList("file", "http", "https", "jar")));
062 *      // @formatter:off
063 *      handler.setLocationStrategy(builder.setSubStrategies(Arrays.asList(
064 *              new ProvidedURLLocationStrategy(builder),
065 *              new FileSystemLocationStrategy(builder),
066 *              new AbsoluteNameLocationStrategy(builder),
067 *              new BasePathLocationStrategy(builder),
068 *              new HomeDirectoryLocationStrategy.Builder().setEvaluateBasePath(true).getUnchecked(),
069 *              new HomeDirectoryLocationStrategy.Builder().setEvaluateBasePath(false).getUnchecked(),
070 *              new ClasspathLocationStrategy(builder)))
071 *              .get());
072 *      // @formatter:on
073 *      handler.setBasePath(TEST_BASE_PATH);
074 *      handler.setFileName("include-load-url-host-unknown-exception.properties");
075 *      handler.load();
076 * }</pre>
077 *
078 *
079 * @since 2.15.0
080 * @see FileLocationStrategy
081 */
082public abstract class AbstractFileLocationStrategy implements FileLocationStrategy {
083
084    /**
085     * Builds new instances for subclasses.
086     * <p>
087     * As of version 2.15.0, by default, the only URL schemes allowed are {@code file} and {@code jar}. To override this default, you can either use the system
088     * property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} or build a subclass of {@link AbstractFileLocationStrategy}.
089     * </p>
090     * <strong>Using System Properties</strong>
091     * <p>
092     * The system property {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes} String value must be a comma-separated list of schemes,
093     * where the default is {@code "file,jar"}, and the complete list is {@code "file,http,https,jar"}.
094     * </p>
095     * <strong>Using a Builder</strong>
096     * <p>
097     * The root builder for {@link AbstractFileLocationStrategy} is {@link AbstractBuilder} where you define allowed schemes and hosts through its setter
098     * methods.
099     * </p>
100     * <p>
101     * See {@link AbstractFileLocationStrategy} learn how to grant an deny URL schemes and hosts.
102     * </p>
103     *
104     * @param <T> The type of {@link FileLocationStrategy} to build.
105     * @param <B> The builder type.
106     */
107    public abstract static class AbstractBuilder<T extends FileLocationStrategy, B extends AbstractBuilder<T, B>> extends AbstractSupplier<T, B> {
108
109        /**
110         * Enabled URL-based hosts, empty means all are enabled. Host are case-insensitive.
111         */
112        private Set<Pattern> hosts = Collections.emptySet();
113        /**
114         * Enabled URL-based schemes, empty means all are enabled. Schemes are case-insensitive.
115         */
116        private Set<String> schemes = Collections.emptySet();
117
118        /**
119         * Constructs a new instance for subclasses.
120         */
121        public AbstractBuilder() {
122            // empty
123        }
124
125        Set<Pattern> getHosts() {
126            return hosts;
127        }
128
129        Set<String> getSchemes() {
130            return schemes;
131        }
132
133        /**
134         * Sets enabled URL-based hosts, empty means all are enabled. URL hosts are case-insensitive.
135         *
136         * @param hosts enabled URL-based hosts.
137         * @return {@code this} instance.
138         */
139        public B setHosts(final Set<Pattern> hosts) {
140            this.hosts = hosts != null ? hosts : Collections.emptySet();
141            return asThis();
142        }
143
144        /**
145         * Sets enabled URL-based hosts, empty means all are enabled. URL hosts are case-insensitive.
146         *
147         * @param hosts Regular expressions enabled URL-based hosts.
148         * @return {@code this} instance.
149         */
150        public B setHostsRegEx(final Set<String> hosts) {
151            return setHosts(hosts.stream().map(e -> Pattern.compile(e, Pattern.CASE_INSENSITIVE)).collect(Collectors.toSet()));
152        }
153
154        /**
155         * Sets enabled URL-based schemes, empty means all are enabled. URL schemes are case-insensitive.
156         *
157         * @param schemes enabled URL-based schemes, the default null means all schemes are allowed.
158         * @return {@code this} instance.
159         */
160        public B setSchemes(final Set<String> schemes) {
161            this.schemes = schemes != null ? schemes : Collections.emptySet();
162            return asThis();
163        }
164    }
165
166    /**
167     * Builds new instances of T.
168     *
169     * @param <T> The type of {@link FileLocationStrategy} to build.
170     */
171    public static class StrategyBuilder<T extends FileLocationStrategy> extends AbstractBuilder<T, StrategyBuilder<T>> {
172
173        /**
174         * Either set this or implement get().
175         */
176        private final Function<StrategyBuilder<T>, T> function;
177
178        /**
179         * Constructs a new instance for subclasses.
180         *
181         * @param function Builds an instance of T.
182         */
183        public StrategyBuilder(final Function<StrategyBuilder<T>, T> function) {
184            this.function = Objects.requireNonNull(function, "function");
185        }
186
187        @Override
188        public T get() {
189            return function.apply(asThis());
190        }
191    }
192
193    /**
194     * Default schemes.
195     */
196    private static final String DEFAULT_SCHEMES = "file,jar";
197    /**
198     * The system property key {@code org.apache.commons.configuration2.io.FileLocationStrategy.schemes}.
199     * <p>
200     * If absent, defaults to {@code "file,jar"}.
201     * </p>
202     * <p>
203     * For complete functionality, use {@code "file,http,https,jar"}.
204     * </p>
205     */
206    private static final String KEY_SCHEMES = "org.apache.commons.configuration2.io.FileLocationStrategy.schemes";
207
208    private static void checkHost(final String value, final Set<Pattern> validSet) {
209        final String lowerCase = StringUtils.toRootLowerCase(value);
210        if (!validSet.isEmpty() && StringUtils.isNotEmpty(lowerCase) && validSet.stream().noneMatch(p -> p.matcher(lowerCase).matches())) {
211            throw new ConfigurationDeniedException("URL host is not enabled: %s; must be one of %s", value, validSet);
212        }
213    }
214
215    /**
216     * Checks if the scheme is allowed.
217     *
218     * @param value A URL scheme, never empty or {@code null}.
219     * @param validSet the scheme valid-set.
220     */
221    private static void checkScheme(final String value, final Set<String> validSet) {
222        if (!validSet.isEmpty() && !validSet.contains(StringUtils.toRootLowerCase(value))) {
223            throw new ConfigurationDeniedException("URL scheme \"%s\" is not enabled, must be one of %s, override defaults with the system property \"%s\", "
224                    + "complete set: \"file,http,https,jar\"", value, validSet, KEY_SCHEMES);
225        }
226    }
227
228    /**
229     * Validates {@code url} against the scheme and host allow-lists.
230     *
231     * @param url           the URL to check.
232     * @param validSchemes  the scheme valid-set.
233     * @param validHosts    the host valid-set.
234     * @throws ConfigurationDeniedException if the URL or any embedded URL fails the check, or a {@code jar:} URL is malformed.
235     */
236    static void checkUrl(final URL url, final Set<String> validSchemes, final Set<Pattern> validHosts) {
237        final String scheme = url.getProtocol();
238        checkScheme(scheme, validSchemes);
239        if ("jar".equalsIgnoreCase(scheme)) {
240            try {
241                // Follows the logic of JarURLConnection#parseSpecs without the cost of opening a connection.
242                final String spec = url.getFile();
243                final int sep = spec.lastIndexOf("!/");
244                if (sep < 0) {
245                    throw new MalformedURLException("no !/ found in url spec:" + spec);
246                }
247                final URL inner = new URL(spec.substring(0, sep));
248                checkUrl(inner, validSchemes, validHosts);
249            } catch (final MalformedURLException e) {
250                throw new ConfigurationDeniedException(e, "Malformed 'jar:' URL: %s", url);
251            }
252        } else {
253            checkHost(url.getHost(), validHosts);
254        }
255    }
256
257    private static Set<String> getSchemesProperty() {
258        final Set<String> set = new LinkedHashSet<>();
259        final String[] split = System.getProperty(KEY_SCHEMES, DEFAULT_SCHEMES).split(",");
260        Collections.addAll(set, split);
261        return set;
262    }
263
264    /**
265     * Enabled URL-based hosts, empty means all are enabled. Host are case-insensitive.
266     */
267    private final Set<Pattern> hosts;
268    /**
269     * Enabled URL-based schemes, empty means all are enabled. Schemes are case-insensitive.
270     */
271    private final Set<String> schemes;
272
273    /**
274     * Constructs a new instance where the enabled URL schemes are read the system property
275     * {@code "org.apache.commons.configuration2.io.FileLocationStrategy.schemes"}.
276     * <p>
277     * If absent, defaults to {@code "file,jar"}.
278     * </p>
279     * <p>
280     * For complete functionality, use {@code "file,http,https,jar"}.
281     * </p>
282     */
283    AbstractFileLocationStrategy() {
284        this(getSchemesProperty());
285    }
286
287    AbstractFileLocationStrategy(final AbstractBuilder<?, ?> builder) {
288        Objects.requireNonNull(builder, "builder");
289        this.schemes = builder.schemes;
290        this.hosts = builder.hosts != null ? builder.hosts : Collections.emptySet();
291    }
292
293    AbstractFileLocationStrategy(final Set<String> schemes) {
294        this.schemes = schemes;
295        this.hosts = Collections.emptySet();
296    }
297
298    URL check(final URL url) {
299        if (url != null) {
300            checkUrl(url, schemes, hosts);
301        }
302        return url;
303    }
304
305    /**
306     * Gets the enabled hosts.
307     *
308     * @return the enabled hosts.
309     */
310    Set<Pattern> getHosts() {
311        return hosts;
312    }
313
314    /**
315     * Gets the enabled schemes.
316     *
317     * @return the enabled schemes.
318     */
319    Set<String> getSchemes() {
320        return schemes;
321    }
322
323    @Override
324    public String toString() {
325        return getClass().getSimpleName() + " [schemes=" + schemes + ", hosts=" + hosts + "]";
326    }
327}