blob: 278733396b94d094547bbba57b47e9914b0b59e0 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
#!/bin/bash
# SPDX-FileCopyrightText: 2025 Nextcloud GmbH and Nextcloud contributors
# SPDX-License-Identifier: AGPL-3.0-or-later
set -e
# Paths
MIME_JSON="mimetypemapping.dist.json"
XML_FILE="freedesktop.org.xml"
OUTPUT_JSON="mimetypenames.dist.json"
echo "1/ Extracting MIME types from $MIME_JSON"
# Extract all unique MIME types (excluding _comment keys)
MIME_TYPES=$(jq -r 'with_entries(select(.key | startswith("_") | not)) | to_entries | map(.value[]) | unique | .[]' "$MIME_JSON")
echo "Found $(echo "$MIME_TYPES" | wc -l) unique MIME types"
echo "2/ Downloading freedesktop.org XML file"
curl -sSL "https://gitlab.freedesktop.org/xdg/shared-mime-info/-/raw/03cb97596e90feda547c9b6a2addd656b14d1598/data/freedesktop.org.xml.in" > "$XML_FILE"
echo "Downloaded XML file to $XML_FILE"
echo "3/ Creating or updating MIME name mapping"
# Start from existing output if it exists, or create an empty one
if [ -f "$OUTPUT_JSON" ]; then
cp "$OUTPUT_JSON" "$OUTPUT_JSON.tmp"
else
echo "{}" > "$OUTPUT_JSON.tmp"
fi
# Track stats
MATCHED_COUNT=0
MISSING_COUNT=0
# Process each MIME type
while read -r MIME; do
echo "Processing: $MIME"
# Extract comment with XML namespace handling
COMMENT=$(xmlstarlet sel -N x="http://www.freedesktop.org/standards/shared-mime-info" \
-t -m "//x:mime-type[@type='${MIME}']" -v "x:comment" -n "$XML_FILE" | head -n 1)
if [ -n "$COMMENT" ]; then
ESCAPED_COMMENT=$(echo "$COMMENT" | sed 's/"/\\"/g')
jq --arg key "$MIME" --arg value "$ESCAPED_COMMENT" '. + {($key): $value}' "$OUTPUT_JSON.tmp" > "$OUTPUT_JSON.tmp2" && mv "$OUTPUT_JSON.tmp2" "$OUTPUT_JSON.tmp"
MATCHED_COUNT=$((MATCHED_COUNT + 1))
else
echo " > Warning: No description found for MIME type $MIME ⚠️"
jq --arg key "$MIME" --arg value "" '. + {($key): $value}' "$OUTPUT_JSON.tmp" > "$OUTPUT_JSON.tmp2" && mv "$OUTPUT_JSON.tmp2" "$OUTPUT_JSON.tmp"
MISSING_COUNT=$((MISSING_COUNT + 1))
fi
done <<< "$MIME_TYPES"
# Final formatting and sorting by keys
jq -S . "$OUTPUT_JSON.tmp" > "$OUTPUT_JSON" && rm "$OUTPUT_JSON.tmp"
echo "✅ Done!"
echo "✔️ Descriptions found for $MATCHED_COUNT MIME types"
echo "⚠️ Descriptions missing for $MISSING_COUNT MIME types"
echo "📄 Output written to $OUTPUT_JSON"
|