Skip to content

Instantly share code, notes, and snippets.

@blackopsrepl
Created June 15, 2025 04:25
Show Gist options
  • Select an option

  • Save blackopsrepl/b35f504de9685aac6fb5121ffa3e69cf to your computer and use it in GitHub Desktop.

Select an option

Save blackopsrepl/b35f504de9685aac6fb5121ffa3e69cf to your computer and use it in GitHub Desktop.
Removes excessive empty lines from a Markdown file while preserving necessary structure
import re
import sys
def clean_markdown(file_path, output_path=None):
"""
Removes excessive empty lines from a Markdown file while preserving necessary structure.
Args:
file_path (str): Path to the input Markdown file.
output_path (str, optional): Path to save the cleaned Markdown. If None, overwrites input file.
"""
try:
# Read the Markdown file
with open(file_path, 'r', encoding='utf-8') as file:
lines = file.readlines()
# Strip trailing whitespace and normalize empty lines
cleaned_lines = []
previous_line_empty = False
for line in lines:
line = line.rstrip() # Remove trailing whitespace and newlines
# Skip completely empty lines (to process later)
if not line:
if not previous_line_empty: # Only add one empty line
cleaned_lines.append("")
previous_line_empty = True
continue
# Reset empty line flag if we encounter a non-empty line
previous_line_empty = False
# Preserve headers, lists, blockquotes, and other non-empty lines
cleaned_lines.append(line)
# Remove excessive empty lines while preserving single empty lines between content
final_lines = []
previous_line_empty = False
for i, line in enumerate(cleaned_lines):
is_empty = not line.strip()
# Skip multiple consecutive empty lines
if is_empty and previous_line_empty:
continue
# Preserve single empty lines between content for readability
if is_empty and i > 0 and i < len(cleaned_lines) - 1:
# Check if the next line is a header or list item to avoid unnecessary blank lines
next_line = cleaned_lines[i + 1]
if next_line.startswith('#') or re.match(r'^\s*[-*+]\s|\d+\.\s', next_line):
continue
final_lines.append(line)
previous_line_empty = is_empty
# Remove trailing empty lines
while final_lines and not final_lines[-1].strip():
final_lines.pop()
# Add a single newline at the end of the file
final_lines.append("")
# Determine output path
output_file = output_path if output_path else file_path
# Write the cleaned content back to the file
with open(output_file, 'w', encoding='utf-8') as file:
file.write('\n'.join(final_lines))
print(f"Cleaned Markdown file saved to: {output_file}")
except FileNotFoundError:
print(f"Error: File '{file_path}' not found.")
except Exception as e:
print(f"An error occurred: {e}")
def main():
if len(sys.argv) < 2:
print("Usage: python clean_markdown.py <input_file> [output_file]")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv) > 2 else None
clean_markdown(input_file, output_file)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment