BigW Consortium Gitlab

po_linter.rb 6.83 KB
Newer Older
1 2 3
module Gitlab
  module I18n
    class PoLinter
4
      attr_reader :po_path, :translation_entries, :metadata_entry, :locale
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25

      VARIABLE_REGEX = /%{\w*}|%[a-z]/.freeze

      def initialize(po_path, locale = I18n.locale.to_s)
        @po_path = po_path
        @locale = locale
      end

      def errors
        @errors ||= validate_po
      end

      def validate_po
        if parse_error = parse_po
          return 'PO-syntax errors' => [parse_error]
        end

        validate_entries
      end

      def parse_po
26
        entries = SimplePoParser.parse(po_path)
27 28 29

        # The first entry is the metadata entry if there is one.
        # This is an entry when empty `msgid`
30 31 32 33 34 35 36 37 38
        if entries.first[:msgid].empty?
          @metadata_entry = Gitlab::I18n::MetadataEntry.new(entries.shift)
        else
          return 'Missing metadata entry.'
        end

        @translation_entries = entries.map do |entry_data|
          Gitlab::I18n::TranslationEntry.new(entry_data, metadata_entry.expected_plurals)
        end
39

40 41
        nil
      rescue SimplePoParser::ParserError => e
42
        @translation_entries = []
43 44 45 46 47 48
        e.message
      end

      def validate_entries
        errors = {}

49
        translation_entries.each do |entry|
50
          errors_for_entry = validate_entry(entry)
51
          errors[join_message(entry.msgid)] = errors_for_entry if errors_for_entry.any?
52 53 54 55 56 57 58 59 60 61 62
        end

        errors
      end

      def validate_entry(entry)
        errors = []

        validate_flags(errors, entry)
        validate_variables(errors, entry)
        validate_newlines(errors, entry)
63
        validate_number_of_plurals(errors, entry)
64
        validate_unescaped_chars(errors, entry)
65 66 67 68

        errors
      end

69 70 71 72 73 74 75 76 77 78 79 80 81 82
      def validate_unescaped_chars(errors, entry)
        if entry.msgid_contains_unescaped_chars?
          errors << 'contains unescaped `%`, escape it using `%%`'
        end

        if entry.plural_id_contains_unescaped_chars?
          errors << 'plural id contains unescaped `%`, escape it using `%%`'
        end

        if entry.translations_contain_unescaped_chars?
          errors << 'translation contains unescaped `%`, escape it using `%%`'
        end
      end

83
      def validate_number_of_plurals(errors, entry)
84
        return unless metadata_entry&.expected_plurals
85 86
        return unless entry.translated?

87
        if entry.has_plural? && entry.all_translations.size != metadata_entry.expected_plurals
88 89
          errors << "should have #{metadata_entry.expected_plurals} "\
                    "#{'translations'.pluralize(metadata_entry.expected_plurals)}"
90 91
        end
      end
92

93
      def validate_newlines(errors, entry)
94
        if entry.msgid_contains_newlines?
95
          errors << 'is defined over multiple lines, this breaks some tooling.'
96 97
        end

98
        if entry.plural_id_contains_newlines?
99
          errors << 'plural is defined over multiple lines, this breaks some tooling.'
100 101 102
        end

        if entry.translations_contain_newlines?
103
          errors << 'has translations defined over multiple lines, this breaks some tooling.'
104 105 106 107
        end
      end

      def validate_variables(errors, entry)
108
        if entry.has_singular_translation?
109 110
          validate_variables_in_message(errors, entry.msgid, entry.singular_translation)
        end
111

112
        if entry.has_plural?
113 114
          entry.plural_translations.each do |translation|
            validate_variables_in_message(errors, entry.plural_id, translation)
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
          end
        end
      end

      def validate_variables_in_message(errors, message_id, message_translation)
        message_id = join_message(message_id)
        required_variables = message_id.scan(VARIABLE_REGEX)

        validate_unnamed_variables(errors, required_variables)
        validate_translation(errors, message_id, required_variables)
        validate_variable_usage(errors, message_translation, required_variables)
      end

      def validate_translation(errors, message_id, used_variables)
        variables = fill_in_variables(used_variables)

        begin
          Gitlab::I18n.with_locale(locale) do
            translated = if message_id.include?('|')
                           FastGettext::Translation.s_(message_id)
                         else
                           FastGettext::Translation._(message_id)
                         end

            translated % variables
          end

        # `sprintf` could raise an `ArgumentError` when invalid passing something
        # other than a Hash when using named variables
        #
        # `sprintf` could raise `TypeError` when passing a wrong type when using
        # unnamed variables
        #
        # FastGettext::Translation could raise `RuntimeError` (raised as a string),
        # or as subclassess `NoTextDomainConfigured` & `InvalidFormat`
        #
        # `FastGettext::Translation` could raise `ArgumentError` as subclassess
        # `InvalidEncoding`, `IllegalSequence` & `InvalidCharacter`
        rescue ArgumentError, TypeError, RuntimeError => e
          errors << "Failure translating to #{locale} with #{variables}: #{e.message}"
        end
      end

      def fill_in_variables(variables)
        if variables.empty?
          []
        elsif variables.any? { |variable| unnamed_variable?(variable) }
          variables.map do |variable|
            variable == '%d' ? Random.rand(1000) : Gitlab::Utils.random_string
          end
        else
          variables.inject({}) do |hash, variable|
            variable_name = variable[/\w+/]
            hash[variable_name] = Gitlab::Utils.random_string
            hash
          end
        end
      end

      def validate_unnamed_variables(errors, variables)
        if  variables.size > 1 && variables.any? { |variable_name| unnamed_variable?(variable_name) }
          errors << 'is combining multiple unnamed variables'
        end
      end

      def validate_variable_usage(errors, translation, required_variables)
        translation = join_message(translation)

        # We don't need to validate when the message is empty.
184 185
        # In this case we fall back to the default, which has all the the
        # required variables.
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
        return if translation.empty?

        found_variables = translation.scan(VARIABLE_REGEX)

        missing_variables = required_variables - found_variables
        if missing_variables.any?
          errors << "<#{translation}> is missing: [#{missing_variables.to_sentence}]"
        end

        unknown_variables = found_variables - required_variables
        if unknown_variables.any?
          errors << "<#{translation}> is using unknown variables: [#{unknown_variables.to_sentence}]"
        end
      end

      def unnamed_variable?(variable_name)
        !variable_name.start_with?('%{')
      end

      def validate_flags(errors, entry)
206
        errors << "is marked #{entry.flag}" if entry.flag
207 208 209 210 211 212 213 214
      end

      def join_message(message)
        Array(message).join
      end
    end
  end
end