resources/config/mimetypenames-update.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62

#!/bin/bash

# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
# SPDX-License-Identifier: AGPL-3.0-or-later

set -e

# Paths
MIME_JSON="mimetypemapping.dist.json"
XML_FILE="freedesktop.org.xml"
OUTPUT_JSON="mimetypenames.dist.json"

echo "1/ Extracting MIME types from $MIME_JSON"

# Extract all unique MIME types (excluding _comment keys)
MIME_TYPES=$(jq -r 'with_entries(select(.key | startswith("_") | not)) | to_entries | map(.value[]) | unique | .[]' "$MIME_JSON")

echo "Found $(echo "$MIME_TYPES" | wc -l) unique MIME types"

echo "2/ Downloading freedesktop.org XML file"
curl -sSL "https://gitlab.freedesktop.org/xdg/shared-mime-info/-/raw/03cb97596e90feda547c9b6a2addd656b14d1598/data/freedesktop.org.xml.in" > "$XML_FILE"
echo "Downloaded XML file to $XML_FILE"

echo "3/ Creating or updating MIME name mapping"

# Start from existing output if it exists, or create an empty one
if [ -f "$OUTPUT_JSON" ]; then
    cp "$OUTPUT_JSON" "$OUTPUT_JSON.tmp"
else
    echo "{}" > "$OUTPUT_JSON.tmp"
fi

# Track stats
MATCHED_COUNT=0
MISSING_COUNT=0

# Process each MIME type
while read -r MIME; do
    echo "Processing: $MIME"

    # Extract comment with XML namespace handling
    COMMENT=$(xmlstarlet sel -N x="http://www.freedesktop.org/standards/shared-mime-info" \
        -t -m "//x:mime-type[@type='${MIME}']" -v "x:comment" -n "$XML_FILE" | head -n 1)

    if [ -n "$COMMENT" ]; then
        ESCAPED_COMMENT=$(echo "$COMMENT" | sed 's/"/\\"/g')
        jq --arg key "$MIME" --arg value "$ESCAPED_COMMENT" '. + {($key): $value}' "$OUTPUT_JSON.tmp" > "$OUTPUT_JSON.tmp2" && mv "$OUTPUT_JSON.tmp2" "$OUTPUT_JSON.tmp"
        MATCHED_COUNT=$((MATCHED_COUNT + 1))
    else
        echo " > Warning: No description found for MIME type $MIME ⚠️"
        jq --arg key "$MIME" --arg value "" '. + {($key): $value}' "$OUTPUT_JSON.tmp" > "$OUTPUT_JSON.tmp2" && mv "$OUTPUT_JSON.tmp2" "$OUTPUT_JSON.tmp"
        MISSING_COUNT=$((MISSING_COUNT + 1))
    fi
done <<< "$MIME_TYPES"

# Final formatting and sorting by keys
jq -S . "$OUTPUT_JSON.tmp" > "$OUTPUT_JSON" && rm "$OUTPUT_JSON.tmp"

echo "✅ Done!"
echo "✔️  Descriptions found for $MATCHED_COUNT MIME types"
echo "⚠️  Descriptions missing for $MISSING_COUNT MIME types"
echo "📄 Output written to $OUTPUT_JSON"