Created
June 15, 2025 04:25
-
-
Save blackopsrepl/b35f504de9685aac6fb5121ffa3e69cf to your computer and use it in GitHub Desktop.
Removes excessive empty lines from a Markdown file while preserving necessary structure
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| import sys | |
| def clean_markdown(file_path, output_path=None): | |
| """ | |
| Removes excessive empty lines from a Markdown file while preserving necessary structure. | |
| Args: | |
| file_path (str): Path to the input Markdown file. | |
| output_path (str, optional): Path to save the cleaned Markdown. If None, overwrites input file. | |
| """ | |
| try: | |
| # Read the Markdown file | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| lines = file.readlines() | |
| # Strip trailing whitespace and normalize empty lines | |
| cleaned_lines = [] | |
| previous_line_empty = False | |
| for line in lines: | |
| line = line.rstrip() # Remove trailing whitespace and newlines | |
| # Skip completely empty lines (to process later) | |
| if not line: | |
| if not previous_line_empty: # Only add one empty line | |
| cleaned_lines.append("") | |
| previous_line_empty = True | |
| continue | |
| # Reset empty line flag if we encounter a non-empty line | |
| previous_line_empty = False | |
| # Preserve headers, lists, blockquotes, and other non-empty lines | |
| cleaned_lines.append(line) | |
| # Remove excessive empty lines while preserving single empty lines between content | |
| final_lines = [] | |
| previous_line_empty = False | |
| for i, line in enumerate(cleaned_lines): | |
| is_empty = not line.strip() | |
| # Skip multiple consecutive empty lines | |
| if is_empty and previous_line_empty: | |
| continue | |
| # Preserve single empty lines between content for readability | |
| if is_empty and i > 0 and i < len(cleaned_lines) - 1: | |
| # Check if the next line is a header or list item to avoid unnecessary blank lines | |
| next_line = cleaned_lines[i + 1] | |
| if next_line.startswith('#') or re.match(r'^\s*[-*+]\s|\d+\.\s', next_line): | |
| continue | |
| final_lines.append(line) | |
| previous_line_empty = is_empty | |
| # Remove trailing empty lines | |
| while final_lines and not final_lines[-1].strip(): | |
| final_lines.pop() | |
| # Add a single newline at the end of the file | |
| final_lines.append("") | |
| # Determine output path | |
| output_file = output_path if output_path else file_path | |
| # Write the cleaned content back to the file | |
| with open(output_file, 'w', encoding='utf-8') as file: | |
| file.write('\n'.join(final_lines)) | |
| print(f"Cleaned Markdown file saved to: {output_file}") | |
| except FileNotFoundError: | |
| print(f"Error: File '{file_path}' not found.") | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print("Usage: python clean_markdown.py <input_file> [output_file]") | |
| sys.exit(1) | |
| input_file = sys.argv[1] | |
| output_file = sys.argv[2] if len(sys.argv) > 2 else None | |
| clean_markdown(input_file, output_file) | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment